From d89647351b3d2377dc3596e7e09f7395939c1509 Mon Sep 17 00:00:00 2001 From: TheAIWizard Date: Mon, 25 Aug 2025 00:02:41 +0000 Subject: [PATCH 1/9] updata nace2025 training data path --- .github/workflows/build-image.yaml | 2 +- argo-workflows/train-workflow.yaml | 2 +- src/utils/data.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index e49fa16..9915945 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -3,7 +3,7 @@ name: Image Build on: push: branches: - - naf2008 + - fasttext-meta tags: - "*" paths: diff --git a/argo-workflows/train-workflow.yaml b/argo-workflows/train-workflow.yaml index ec5fc5f..5c2dd61 100644 --- a/argo-workflows/train-workflow.yaml +++ b/argo-workflows/train-workflow.yaml @@ -75,7 +75,7 @@ spec: image: inseefrlab/codif-ape-train:naf2008 imagePullPolicy: Always command: ["/bin/bash", -c] - args: ["git clone -b naf2008 https://github.com/InseeFrLab/codif-ape-train.git &&\ + args: ["git clone -b fasttext-meta https://github.com/InseeFrLab/codif-ape-train.git &&\ cd codif-ape-train/ &&\ export MLFLOW_EXPERIMENT_NAME={{inputs.parameters.EXPERIMENT_NAME}} &&\ mlflow run ~/work/codif-ape-train/ \ diff --git a/src/utils/data.py b/src/utils/data.py index 0455511..6eb3e34 100644 --- a/src/utils/data.py +++ b/src/utils/data.py @@ -46,7 +46,7 @@ def get_sirene_4_data( if revision == "NAF2008": path = "projet-ape/extractions/20241027_sirene4.parquet" elif revision == "NAF2025": - path = "projet-ape/NAF-revision/relabeled-data/20241027_sirene4_nace2025.parquet" + path = "projet-ape/extractions/domain_specific_cleaned/full_dataset_20241027_sirene4_nace2025_fuzzy_regex_similarity.parquet" else: raise ValueError("Revision must be either 'NAF2008' or 'NAF2025'.") From b835b1490855ad8f6d5d751f400c13b461b6217f Mon Sep 17 00:00:00 2001 From: TheAIWizard Date: Mon, 25 Aug 2025 00:23:03 +0000 Subject: [PATCH 2/9] updata training data path from cleansed --- src/utils/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/data.py b/src/utils/data.py index 6eb3e34..2a294d9 100644 --- a/src/utils/data.py +++ b/src/utils/data.py @@ -44,7 +44,7 @@ def get_sirene_4_data( fs = get_file_system() if revision == "NAF2008": - path = "projet-ape/extractions/20241027_sirene4.parquet" + path = "projet-ape/extractions/domain_specific_cleaned/full_dataset_20241027_sirene4_nacerev2_fuzzy_regex_similarity.parquet" elif revision == "NAF2025": path = "projet-ape/extractions/domain_specific_cleaned/full_dataset_20241027_sirene4_nace2025_fuzzy_regex_similarity.parquet" else: From c1e58e89dc38de63ef7383f04b3ffc2f7fc1f8fc Mon Sep 17 00:00:00 2001 From: TheAIWizard Date: Mon, 25 Aug 2025 00:49:49 +0000 Subject: [PATCH 3/9] manage image --- argo-workflows/train-workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argo-workflows/train-workflow.yaml b/argo-workflows/train-workflow.yaml index 5c2dd61..6b51a8e 100644 --- a/argo-workflows/train-workflow.yaml +++ b/argo-workflows/train-workflow.yaml @@ -72,7 +72,7 @@ spec: - name: MODEL_CLASS - name: EXPERIMENT_NAME container: - image: inseefrlab/codif-ape-train:naf2008 + image: inseefrlab/codif-ape-train:fasttext-meta imagePullPolicy: Always command: ["/bin/bash", -c] args: ["git clone -b fasttext-meta https://github.com/InseeFrLab/codif-ape-train.git &&\ From 7a7eacf3da157cf39637d270bbc3df4fb468db3e Mon Sep 17 00:00:00 2001 From: TheAIWizard Date: Mon, 25 Aug 2025 07:41:04 +0000 Subject: [PATCH 4/9] activate github action wf --- .github/workflows/build-image.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index 9915945..73d80b3 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -11,6 +11,7 @@ on: - Dockerfile # Allows you to run this workflow manually from the Actions tab workflow_dispatch: + pull_request: jobs: image-build: From a7ddd717a8bd934cd4a5534b7e109f56734f28a6 Mon Sep 17 00:00:00 2001 From: TheAIWizard Date: Mon, 25 Aug 2025 07:48:08 +0000 Subject: [PATCH 5/9] activate github action wf --- .github/workflows/build-image.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index 73d80b3..8919fba 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -10,8 +10,9 @@ on: - requirements.txt - Dockerfile # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: pull_request: + workflow_dispatch: + jobs: image-build: From 34c39777ecf69f59928a35ab3c8b4506e3c2dc70 Mon Sep 17 00:00:00 2001 From: Nathan Randriamanana <37664429+TheAIWizard@users.noreply.github.com> Date: Mon, 25 Aug 2025 09:51:56 +0200 Subject: [PATCH 6/9] Create build-image.yaml --- .github/build-image.yaml | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/build-image.yaml diff --git a/.github/build-image.yaml b/.github/build-image.yaml new file mode 100644 index 0000000..8919fba --- /dev/null +++ b/.github/build-image.yaml @@ -0,0 +1,61 @@ +name: Image Build + +on: + push: + branches: + - fasttext-meta + tags: + - "*" + paths: + - requirements.txt + - Dockerfile + # Allows you to run this workflow manually from the Actions tab + pull_request: + workflow_dispatch: + + +jobs: + image-build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: inseefrlab/codif-ape-train + + - name: Make free space + # https://github.com/actions/virtual-environments/issues/2840 + run: | + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" + docker rmi -f $(docker images -aq) + shell: bash + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push + id: docker_build + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} From 2c40d1a1beaaffb91ffec77ac58e4e13e351b539 Mon Sep 17 00:00:00 2001 From: Nathan Randriamanana <37664429+TheAIWizard@users.noreply.github.com> Date: Mon, 25 Aug 2025 09:57:30 +0200 Subject: [PATCH 7/9] Delete .github/build-image.yaml --- .github/build-image.yaml | 61 ---------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 .github/build-image.yaml diff --git a/.github/build-image.yaml b/.github/build-image.yaml deleted file mode 100644 index 8919fba..0000000 --- a/.github/build-image.yaml +++ /dev/null @@ -1,61 +0,0 @@ -name: Image Build - -on: - push: - branches: - - fasttext-meta - tags: - - "*" - paths: - - requirements.txt - - Dockerfile - # Allows you to run this workflow manually from the Actions tab - pull_request: - workflow_dispatch: - - -jobs: - image-build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: inseefrlab/codif-ape-train - - - name: Make free space - # https://github.com/actions/virtual-environments/issues/2840 - run: | - sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" - docker rmi -f $(docker images -aq) - shell: bash - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to DockerHub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push - id: docker_build - uses: docker/build-push-action@v5 - with: - context: . - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} From 494905a87cae0a9d6515b967c3dddb47119e9c0e Mon Sep 17 00:00:00 2001 From: Nathan Randriamanana <37664429+TheAIWizard@users.noreply.github.com> Date: Mon, 25 Aug 2025 10:12:52 +0200 Subject: [PATCH 8/9] Update build-image.yaml --- .github/workflows/build-image.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index 8919fba..fb38364 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -29,13 +29,6 @@ jobs: with: images: inseefrlab/codif-ape-train - - name: Make free space - # https://github.com/actions/virtual-environments/issues/2840 - run: | - sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" - docker rmi -f $(docker images -aq) - shell: bash - - name: Set up QEMU uses: docker/setup-qemu-action@v3 From cc9ef1aa597d1aad85db96dc40b9e979687eba96 Mon Sep 17 00:00:00 2001 From: Nathan Randriamanana <37664429+TheAIWizard@users.noreply.github.com> Date: Mon, 25 Aug 2025 10:19:31 +0200 Subject: [PATCH 9/9] Update requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dfb4f67..50b426e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,3 @@ sentencepiece accelerate datasets evaluate -torchFastText @ git+https://github.com/inseefrlab/torch-fastText@package \ No newline at end of file