|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [], |
| 8 | + "source": [ |
| 9 | + "%load_ext autoreload\n", |
| 10 | + "%autoreload 2" |
| 11 | + ] |
| 12 | + }, |
| 13 | + { |
| 14 | + "cell_type": "code", |
| 15 | + "execution_count": null, |
| 16 | + "metadata": {}, |
| 17 | + "outputs": [], |
| 18 | + "source": [ |
| 19 | + "from bechdelai.data.youtube import download_youtube_video" |
| 20 | + ] |
| 21 | + }, |
| 22 | + { |
| 23 | + "cell_type": "code", |
| 24 | + "execution_count": null, |
| 25 | + "metadata": {}, |
| 26 | + "outputs": [], |
| 27 | + "source": [ |
| 28 | + "youtube_trailer_url = \"https://www.youtube.com/watch?v=EzWIsGqeoVQ\"\n", |
| 29 | + "output_filename = \"raid.mp4\"\n", |
| 30 | + "youtube_language = \"fr-FR\"" |
| 31 | + ] |
| 32 | + }, |
| 33 | + { |
| 34 | + "cell_type": "code", |
| 35 | + "execution_count": 4, |
| 36 | + "metadata": {}, |
| 37 | + "outputs": [ |
| 38 | + { |
| 39 | + "name": "stdout", |
| 40 | + "output_type": "stream", |
| 41 | + "text": [ |
| 42 | + "Task Completed!\n" |
| 43 | + ] |
| 44 | + } |
| 45 | + ], |
| 46 | + "source": [ |
| 47 | + "download_youtube_video(youtube_trailer_url, output_filename, youtube_language)" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "cell_type": "code", |
| 52 | + "execution_count": 2, |
| 53 | + "metadata": {}, |
| 54 | + "outputs": [ |
| 55 | + { |
| 56 | + "name": "stderr", |
| 57 | + "output_type": "stream", |
| 58 | + "text": [ |
| 59 | + "/home/thomas/miniconda3/envs/bechdelai/lib/python3.9/site-packages/transformers/generation/utils.py:1273: UserWarning: Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to 448 (`generation_config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n", |
| 60 | + " warnings.warn(\n" |
| 61 | + ] |
| 62 | + }, |
| 63 | + { |
| 64 | + "data": { |
| 65 | + "text/plain": [ |
| 66 | + "{'text': \" Leur Raid, l'élite de la police, des super-agences sur-entraînées. Leur devise, servir sans faillir. Suivant. Bonjour. Ah, c'est pour le secrétairelien, l'infirmiereau ou la cantine ? Je suis là pour le groupe d'intervention du Raid. C'est un danger pour le groupe, une gonzesse. Une femme froissale. Ça crie, ça chiale, ça se pète les ondes, ça se poince les cheveux dans le casque. Bon, on a une femme, c'est comme ça. Oh non ! Puis ça, tout le temps, envie de pisser. On n'a pas les combinaisons adaptées pour les pisseuses. Effectivement, vous n'êtes pas du tout macho. Je me suis trompé. Mais ? Vous êtes 16. Y a 4 lits par chambre. Faites-moi tout de suite 4 groupes de 4, s'il vous plaît. Alors ça, c'est un groupe de 16. I've been looking for her file. She's got very high quality. Do you know that? That's annoying. Hide it. And the fact that she wears the same name as the Minister of the Interior? But it has nothing to do with it. It's her daughter, but it has no relationship. I warn you, on the first occasion, I'll fire her. We have to go get her. No, it's good, look, she's still making bubbles. En casion, je la fiers. Il faut aller la chercher là. Non, c'est beau, regarde, elle fait encore des bulles. Pardon ! Elle a, elle est éliminée, elle a... Ah bah non. L'ennemi est neutralisé là. She's finished. Oh, no. The enemy is neutralized.\",\n", |
| 67 | + " 'chunks': [{'text': \" Leur Raid, l'élite de la police, des super-agences sur-entraînées.\",\n", |
| 68 | + " 'timestamp': (0.0, 5.0)},\n", |
| 69 | + " {'text': ' Leur devise, servir sans faillir.', 'timestamp': (5.0, 8.0)},\n", |
| 70 | + " {'text': ' Suivant.', 'timestamp': (8.0, 9.0)},\n", |
| 71 | + " {'text': ' Bonjour.', 'timestamp': (9.0, 10.0)},\n", |
| 72 | + " {'text': \" Ah, c'est pour le secrétairelien, l'infirmiereau ou la cantine ?\",\n", |
| 73 | + " 'timestamp': (10.0, 12.0)},\n", |
| 74 | + " {'text': \" Je suis là pour le groupe d'intervention du Raid.\",\n", |
| 75 | + " 'timestamp': (12.0, 14.0)},\n", |
| 76 | + " {'text': \" C'est un danger pour le groupe, une gonzesse.\",\n", |
| 77 | + " 'timestamp': (14.0, 16.0)},\n", |
| 78 | + " {'text': ' Une femme froissale.', 'timestamp': (16.0, 17.0)},\n", |
| 79 | + " {'text': ' Ça crie, ça chiale, ça se pète les ondes, ça se poince les cheveux dans le casque.',\n", |
| 80 | + " 'timestamp': (17.0, 22.0)},\n", |
| 81 | + " {'text': \" Bon, on a une femme, c'est comme ça.\", 'timestamp': (22.0, 24.0)},\n", |
| 82 | + " {'text': ' Oh non ! Puis ça, tout le temps, envie de pisser.',\n", |
| 83 | + " 'timestamp': (24.0, 26.0)},\n", |
| 84 | + " {'text': \" On n'a pas les combinaisons adaptées pour les pisseuses.\",\n", |
| 85 | + " 'timestamp': (26.0, 28.0)},\n", |
| 86 | + " {'text': \" Effectivement, vous n'êtes pas du tout macho.\",\n", |
| 87 | + " 'timestamp': (28.0, 30.0)},\n", |
| 88 | + " {'text': ' Je me suis trompé.', 'timestamp': (30.0, 31.0)},\n", |
| 89 | + " {'text': ' Mais ?', 'timestamp': (31.0, 32.0)},\n", |
| 90 | + " {'text': ' Vous êtes 16.', 'timestamp': (32.0, 33.0)},\n", |
| 91 | + " {'text': ' Y a 4 lits par chambre.', 'timestamp': (33.0, 35.0)},\n", |
| 92 | + " {'text': \" Faites-moi tout de suite 4 groupes de 4, s'il vous plaît.\",\n", |
| 93 | + " 'timestamp': (35.0, 37.0)},\n", |
| 94 | + " {'text': \" Alors ça, c'est un groupe de 16.\", 'timestamp': (37.0, 42.0)},\n", |
| 95 | + " {'text': \" I've been looking for her file. She's got very high quality.\",\n", |
| 96 | + " 'timestamp': (42.0, 45.0)},\n", |
| 97 | + " {'text': ' Do you know that?', 'timestamp': (45.0, 46.0)},\n", |
| 98 | + " {'text': \" That's annoying.\", 'timestamp': (46.0, 47.0)},\n", |
| 99 | + " {'text': ' Hide it.', 'timestamp': (47.0, 48.0)},\n", |
| 100 | + " {'text': ' And the fact that she wears the same name as the Minister of the Interior?',\n", |
| 101 | + " 'timestamp': (48.0, 50.0)},\n", |
| 102 | + " {'text': ' But it has nothing to do with it.', 'timestamp': (50.0, 51.0)},\n", |
| 103 | + " {'text': \" It's her daughter, but it has no relationship.\",\n", |
| 104 | + " 'timestamp': (51.0, 53.0)},\n", |
| 105 | + " {'text': \" I warn you, on the first occasion, I'll fire her.\",\n", |
| 106 | + " 'timestamp': (53.0, 55.0)},\n", |
| 107 | + " {'text': ' We have to go get her.', 'timestamp': (55.0, 56.0)},\n", |
| 108 | + " {'text': \" No, it's good, look, she's still making bubbles.\",\n", |
| 109 | + " 'timestamp': (56.0, 59.0)},\n", |
| 110 | + " {'text': ' En casion, je la fiers.', 'timestamp': (59.0, 60.0)},\n", |
| 111 | + " {'text': ' Il faut aller la chercher là.', 'timestamp': (60.0, 61.0)},\n", |
| 112 | + " {'text': \" Non, c'est beau, regarde, elle fait encore des bulles.\",\n", |
| 113 | + " 'timestamp': (61.0, 63.0)},\n", |
| 114 | + " {'text': ' Pardon !', 'timestamp': (63.0, 63.5)},\n", |
| 115 | + " {'text': ' Elle a, elle est éliminée, elle a...', 'timestamp': (63.5, 65.0)},\n", |
| 116 | + " {'text': ' Ah bah non.', 'timestamp': (65.0, 65.5)},\n", |
| 117 | + " {'text': \" L'ennemi est neutralisé là.\", 'timestamp': (65.5, 66.5)},\n", |
| 118 | + " {'text': \" She's finished.\", 'timestamp': (66.5, 68.3)},\n", |
| 119 | + " {'text': ' Oh, no. The enemy is neutralized.', 'timestamp': (68.3, 70.7)}]}" |
| 120 | + ] |
| 121 | + }, |
| 122 | + "execution_count": 2, |
| 123 | + "metadata": {}, |
| 124 | + "output_type": "execute_result" |
| 125 | + } |
| 126 | + ], |
| 127 | + "source": [ |
| 128 | + "from bechdelai.audio.speech_recognition import SpeechRecognition\n", |
| 129 | + "\n", |
| 130 | + "sr = SpeechRecognition()\n", |
| 131 | + "sr.transcribe(output_filename, \"fr\")\n" |
| 132 | + ] |
| 133 | + } |
| 134 | + ], |
| 135 | + "metadata": { |
| 136 | + "kernelspec": { |
| 137 | + "display_name": "bechdelai", |
| 138 | + "language": "python", |
| 139 | + "name": "python3" |
| 140 | + }, |
| 141 | + "language_info": { |
| 142 | + "codemirror_mode": { |
| 143 | + "name": "ipython", |
| 144 | + "version": 3 |
| 145 | + }, |
| 146 | + "file_extension": ".py", |
| 147 | + "mimetype": "text/x-python", |
| 148 | + "name": "python", |
| 149 | + "nbconvert_exporter": "python", |
| 150 | + "pygments_lexer": "ipython3", |
| 151 | + "version": "3.9.16" |
| 152 | + }, |
| 153 | + "orig_nbformat": 4, |
| 154 | + "vscode": { |
| 155 | + "interpreter": { |
| 156 | + "hash": "31ffc711ab2ee07bd298f523dc1dd63ebc15cb1e136e0e7de381fff9c93dfdff" |
| 157 | + } |
| 158 | + } |
| 159 | + }, |
| 160 | + "nbformat": 4, |
| 161 | + "nbformat_minor": 2 |
| 162 | +} |
0 commit comments