sockets funcionais

This commit is contained in:
JotaChina 2025-11-10 16:16:59 -03:00
parent 509e8fb609
commit 390b84d331
1252 changed files with 186897 additions and 143 deletions

View File

@ -4,8 +4,8 @@ description: >- # this means to ignore newlines until "baseurl:"
Write an awesome description for your new site here. You can edit this Write an awesome description for your new site here. You can edit this
line in _config.yml. It will appear in your document head meta (for line in _config.yml. It will appear in your document head meta (for
Google search results) and in your feed.xml site description. Google search results) and in your feed.xml site description.
baseurl: "/mmpSearch/" # the subpath of your site, e.g. /blog baseurl: "/~jotachina/mmpSearch/" # the subpath of your site, e.g. /blog
url: "https://alice.ufsj.edu.br" # the base hostname & protocol for your site, e.g. http://example.com url: "https://alice.ufsj.edu.br/" # the base hostname & protocol for your site, e.g. http://example.com
theme: alice theme: alice

View File

@ -8,10 +8,8 @@ title: "{{ page.file }}"
<main class="content"> <main class="content">
<div class="container"> <div class="container">
<!-- Título -->
<h1 class="title is-3 mb-4"><code>{{ page.file }}</h1></code> <h1 class="title is-3 mb-4"><code>{{ page.file }}</h1></code>
<!-- Metadados (Arquivo fonte + BPM) -->
{% if page.file or page.bpm %} {% if page.file or page.bpm %}
<div class="mb-5"> <div class="mb-5">
<div class="columns is-mobile is-multiline is-vcentered"> <div class="columns is-mobile is-multiline is-vcentered">
@ -29,114 +27,161 @@ title: "{{ page.file }}"
</div> </div>
{% endif %} {% endif %}
<!-- Tags --> <div class="columns">
{% if page.tags %}
{% assign tags_vazias = true %}
{% for categoria in page.tags %}
{% if categoria[1].size > 0 %}
{% assign tags_vazias = false %}
{% endif %}
{% endfor %}
{% unless tags_vazias %} <div class="column is-two-thirds">
<section class="tags-section mb-6">
<!-- Verificar se o arquivo .wav existe -->
{% assign audio_file = '/mmp/wav/' | append: page.file | append: '.wav' %}
<!-- Exibir player de áudio --> {% if page.tags %}
{% if audio_file %} {% assign tags_vazias = true %}
<section class="audio-player-section mb-6"> {% for categoria in page.tags %}
<audio controls> {% if categoria[1].size > 0 %}
<source src="{{ audio_file | relative_url }}" type="audio/wav"> {% assign tags_vazias = false %}
Seu navegador não suporta o elemento <code>audio</code>.
</audio>
</section>
{% endif %}
<h2 class="title is-5"><code>🏷️ Tags</code></h2>
{% for categoria in page.tags %}
{% if categoria[1].size > 0 %}
<div class="mb-4">
<strong><code>{{ categoria[0] }}:</strong></code>
<div class="tags mt-2">
{% for valor in categoria[1] %}
{% if valor != "" %}
{% assign tag_slug = valor | replace: ' ', '+' %}
<!-- Gerar link para filtro por categoria -->
{% if categoria[0] == 'bassline' %}
<a href="{{ '/bassline/?bassline=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% elsif categoria[0] == 'sample' %}
<a href="{{ '/sample/?sample=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% elsif categoria[0] == 'plugin' %}
<a href="{{ '/plugin/?plugin=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% elsif categoria[0] == 'automation' %}
<a href="{{ '/automation/?automation=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% else %}
<a href="{{ '/' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% endif %}
{% endif %}
{% endfor %}
</div>
</div>
{% endif %}
{% endfor %}
</section>
{% endunless %}
{% endif %}
<!-- Instrumentos -->
{% if page.tracks and page.tracks.size > 0 %}
<section class="instruments-section">
<h2 class="title is-5"><code>🎚️ Instrumentos</h2></code>
<div class="content">
<ul style="list-style: none; padding-left: 0;">
{% for track in page.tracks %}
{% assign tem_instr = false %}
{% if track.instruments and track.instruments.size > 0 %}
{% assign tem_instr = true %}
{% endif %}
{% if track.bassline_name or tem_instr %}
<li class="mb-5">
{% if track.bassline_name %}
<p class="has-text-weight-bold mb-2">🎼 {{ track.bassline_name }}</p>
{% endif %}
{% if tem_instr %}
<ul class="pl-4">
{% for instrument in track.instruments %}
{% if instrument.instrument_name %}
<li style="margin-bottom: 0.4rem;">
<!-- Gerar link para filtro por instrumento -->
{% assign instrument_slug = instrument.instrument_name | replace: ' ', '+' %}
<a href="{{ '/instruments/?instrument=' | append: instrument_slug | relative_url }}" class="tag is-info is-light">
<code>{{ instrument.instrument_name }}</code>
</a>
{% if instrument.audiofileprocessor and instrument.audiofileprocessor.src %}
<!-- Exibir player de áudio para o instrumento -->
<div class="audio-player-section mt-3">
<audio controls>
<!-- O caminho, aparentemente, está correto. mas não está emitindo sons -->
<source src="{{ '/mmp/instruments/lmms/' | append: instrument.audiofileprocessor.src | relative_url }}" type="audio/wav">
<source src="{{ '/mmp/instruments/lmms/' | append: instrument.audiofileprocessor.src | relative_url }}" type="audio/ogg">
Seu navegador não suporta o elemento <code>audio</code>.
</audio>
</div>
{% endif %}
</li>
{% endif %}
{% endfor %}
</ul>
{% endif %}
</li>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
</ul>
</div> {% unless tags_vazias %}
</section> <section class="tags-section mb-6">
{% endif %} {% assign audio_file = '/mmp/wav/' | append: page.file | append: '.wav' %}
</div>
{% if audio_file %}
<section class="audio-player-section mb-6">
<audio controls>
<source src="{{ audio_file | relative_url }}" type="audio/wav">
Seu navegador não suporta o elemento <code>audio</code>.
</audio>
</section>
{% endif %}
<h2 class="title is-5"><code>🏷️ Tags</code></h2>
{% for categoria in page.tags %}
{% if categoria[1].size > 0 %}
<div class="mb-4">
<strong><code>{{ categoria[0] }}:</strong></code>
<div class="tags mt-2">
{% for valor in categoria[1] %}
{% if valor != "" %}
{% assign tag_slug = valor | replace: ' ', '+' %}
{% if categoria[0] == 'bassline' %}
<a href="{{ '/bassline/?bassline=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% elsif categoria[0] == 'sample' %}
<a href="{{ '/sample/?sample=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% elsif categoria[0] == 'plugin' %}
<a href="{{ '/plugin/?plugin=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% elsif categoria[0] == 'automation' %}
<a href="{{ '/automation/?automation=' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% else %}
<a href="{{ '/' | append: tag_slug | relative_url }}" class="tag is-info is-light">{{ valor }}</a>
{% endif %}
{% endif %}
{% endfor %}
</div>
</div>
{% endif %}
{% endfor %}
</section>
{% endunless %}
{% endif %}
{% if page.tracks and page.tracks.size > 0 %}
<section class="instruments-section">
<h2 class="title is-5"><code>🎚️ Instrumentos</h2></code>
<div class="content">
<ul style="list-style: none; padding-left: 0;">
{% for track in page.tracks %}
{% assign tem_instr = false %}
{% if track.instruments and track.instruments.size > 0 %}
{% assign tem_instr = true %}
{% endif %}
{% if track.bassline_name or tem_instr %}
<li class="mb-5">
{% if track.bassline_name %}
<p class="has-text-weight-bold mb-2">🎼 {{ track.bassline_name }}</p>
{% endif %}
{% if tem_instr %}
<ul class="pl-4">
{% for instrument in track.instruments %}
{% if instrument.instrument_name %}
<li style="margin-bottom: 0.4rem;">
{% assign instrument_slug = instrument.instrument_name | replace: ' ', '+' %}
<a href="{{ '/instruments/?instrument=' | append: instrument_slug | relative_url }}" class="tag is-info is-light">
<code>{{ instrument.instrument_name }}</code>
</a>
{% if instrument.audiofileprocessor and instrument.audiofileprocessor.src %}
{% assign audio_filename_with_path = 'src/samples/' | append: instrument.audiofileprocessor.src %}
<div class="audio-player-section mt-3">
<audio controls>
<source
src="{{ audio_filename_with_path | relative_url }}"
type="audio/ogg"
>
{% assign wav_path = audio_filename_with_path | replace: '.ogg', '.wav' %}
<source
src="{{ wav_path | relative_url }}"
type="audio/wav"
>
Seu navegador não suporta o elemento <code>audio</code>.
</audio>
</div>
{% endif %}
</li>
{% endif %}
{% endfor %}
</ul>
{% endif %}
</li>
{% endif %}
{% endfor %}
</ul>
</div>
</section>
{% endif %}
</div> <div class="column is-one-third">
<div class="box p-4 has-background-info-light">
<h3 class="title is-6 has-text-info mb-3">🛠️ Abra na Criação Colaborativa</h3>
{% comment %}
CONSTRUÇÃO DA URL:
1. Usa o arquivo fonte (`page.file`) como parâmetro `project`.
2. Assume que a plataforma de criação está em `/creations/`.
3. Abre em uma nova aba (`target="_blank"`).
{% endcomment %}
{% assign creation_url = '/creations/?project=' | append: page.file | relative_url %}
<a
href="{{ creation_url }}"
target="_blank"
class="button is-info is-fullwidth is-medium mb-4"
>
<span class="icon"><i class="fa-solid fa-up-right-from-square"></i></span>
<span>Abrir no MMPCreator</span>
</a>
<p class="is-size-7 has-text-grey-dark">
O link abrirá o projeto em uma nova aba para edição.
<br>
Arquivo: <code>{{ page.file }}</code>
</p>
<div class="mt-4 is-hidden-mobile">
<h4 class="title is-7 has-text-grey-dark">Prévia (Embed)</h4>
{% comment %}
A prévia embutida é opcional e pode ser um pouco pesada.
Ajuste o caminho da URL e o estilo (altura) conforme necessário.
{% endcomment %}
<iframe
src="{{ creation_url }}"
title="Prévia do Projeto"
style="width: 100%; height: 300px; border: 1px solid #ccc; border-radius: 4px;"
>
</iframe>
</div>
</div>
</div> </div>
</div>
</main> </main>
</div> </div>

View File

@ -25,6 +25,22 @@ import { sendAction, joinRoom, setUserName } from "./socket.js";
const ROOM_NAME = new URLSearchParams(window.location.search).get("room"); const ROOM_NAME = new URLSearchParams(window.location.search).get("room");
window.ROOM_NAME = ROOM_NAME; window.ROOM_NAME = ROOM_NAME;
const PROJECT_NAME = new URLSearchParams(window.location.search).get("project");
if (PROJECT_NAME) {
// O nome do projeto deve corresponder ao arquivo no servidor, por ex: "mmp/nome-do-seu-projeto-salvo.mmp"
// O arquivo 'file.js' já espera que loadProjectFromServer receba apenas o nome
// do arquivo dentro da pasta 'mmp/' (ex: 'nome-do-projeto.mmp').
console.log(`[MAIN] Carregando projeto do servidor: ${PROJECT_NAME}`);
// Adicione a extensão se ela não estiver no link
const filename = PROJECT_NAME.endsWith('.mmp') || PROJECT_NAME.endsWith('.mmpz')
? PROJECT_NAME
: `${PROJECT_NAME}.mmp`;
// Chama a função de file.js para carregar (que já envia a ação 'LOAD_PROJECT')
loadProjectFromServer(filename);
}
// ✅ NOVO: se tem sala na URL, entra já na sala (independe do áudio) // ✅ NOVO: se tem sala na URL, entra já na sala (independe do áudio)
if (ROOM_NAME) { if (ROOM_NAME) {

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,4 @@
{"level":30,"time":1762338349098,"pid":3059021,"hostname":"ubuntu","timestamp":1762338349098,"socketId":"Tc5nbWviseZsyqK6AAAV","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderName":"Alicer-6578"},"msg":"action_received"}
{"level":30,"time":1762338371674,"pid":3059021,"hostname":"ubuntu","timestamp":1762338371674,"socketId":"Tc5nbWviseZsyqK6AAAV","action":{"type":"TOGGLE_PLAYBACK","__token":"2","__senderId":"Tc5nbWviseZsyqK6AAAV","__senderName":"Alicer-Tc5n","scheduleAtServerMs":1762338371762,"__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762338372738,"pid":3059021,"hostname":"ubuntu","timestamp":1762338372738,"socketId":"Tc5nbWviseZsyqK6AAAV","action":{"type":"STOP_PLAYBACK","__token":"3","__senderId":"Tc5nbWviseZsyqK6AAAV","__senderName":"Alicer-Tc5n","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762338380431,"pid":3059021,"hostname":"ubuntu","timestamp":1762338380431,"socketId":"Tc5nbWviseZsyqK6AAAV","action":{"type":"SET_SEEK_TIME","seekTime":3.8409620920817056,"__token":"4","__senderId":"Tc5nbWviseZsyqK6AAAV","__senderName":"Alicer-Tc5n","__syncMode":"global"},"msg":"action_received"}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,24 @@
{"level":30,"time":1762302188117,"pid":2957627,"hostname":"ubuntu","timestamp":1762302188117,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302195131,"pid":2957627,"hostname":"ubuntu","timestamp":1762302195131,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"TOGGLE_PLAYBACK","__token":"2","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop","scheduleAtServerMs":1762302195320,"__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762302196631,"pid":2957627,"hostname":"ubuntu","timestamp":1762302196631,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"STOP_PLAYBACK","__token":"3","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762302235153,"pid":2957627,"hostname":"ubuntu","timestamp":1762302235153,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_B","value":"5","__token":"4","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302236680,"pid":2957627,"hostname":"ubuntu","timestamp":1762302236680,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_B","value":"4","__token":"5","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302240744,"pid":2957627,"hostname":"ubuntu","timestamp":1762302240744,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"5","__token":"6","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302242145,"pid":2957627,"hostname":"ubuntu","timestamp":1762302242145,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"4","__token":"7","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302246025,"pid":2957627,"hostname":"ubuntu","timestamp":1762302246025,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"5","__token":"8","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302248385,"pid":2957627,"hostname":"ubuntu","timestamp":1762302248385,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"4","__token":"9","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302253038,"pid":2957627,"hostname":"ubuntu","timestamp":1762302253038,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"ADD_AUDIO_LANE","trackId":"track_1762302178952_wcttlam","__token":"10","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302256405,"pid":2957627,"hostname":"ubuntu","timestamp":1762302256405,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"5","__token":"11","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302260775,"pid":2957627,"hostname":"ubuntu","timestamp":1762302260775,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"4","__token":"12","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302262125,"pid":2957627,"hostname":"ubuntu","timestamp":1762302262125,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"3","__token":"13","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302264521,"pid":2957627,"hostname":"ubuntu","timestamp":1762302264521,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_TIMESIG_A","value":"4","__token":"14","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302276019,"pid":2957627,"hostname":"ubuntu","timestamp":1762302276019,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_LOOP_STATE","isLoopActive":true,"loopStartTime":0,"loopEndTime":8,"__token":"15","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762302304764,"pid":2957627,"hostname":"ubuntu","timestamp":1762302304764,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_LOOP_STATE","isLoopActive":true,"loopStartTime":0,"loopEndTime":10.317708333333334,"__token":"16","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762302307608,"pid":2957627,"hostname":"ubuntu","timestamp":1762302307608,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_BARS","value":"5","__token":"17","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302310602,"pid":2957627,"hostname":"ubuntu","timestamp":1762302310602,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_BARS","value":"4","__token":"18","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302317921,"pid":2957627,"hostname":"ubuntu","timestamp":1762302317921,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_LOOP_STATE","isLoopActive":true,"loopStartTime":0,"loopEndTime":10.661458333333334,"__token":"19","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762302319371,"pid":2957627,"hostname":"ubuntu","timestamp":1762302319371,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"START_AUDIO_PLAYBACK","seekTime":0,"loopState":{"isLoopActive":true,"loopStartTime":0,"loopEndTime":10.661458333333334},"__token":"20","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop","scheduleAtServerMs":1762302319558,"__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762302646626,"pid":2957627,"hostname":"ubuntu","timestamp":1762302646626,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_BARS","value":"5","__token":"21","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302647499,"pid":2957627,"hostname":"ubuntu","timestamp":1762302647499,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_BARS","value":"6","__token":"22","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302653247,"pid":2957627,"hostname":"ubuntu","timestamp":1762302653247,"socketId":"FcopcdPtJpq9mvaJAAAL","action":{"type":"SET_BARS","value":"7","__token":"23","__senderId":"FcopcdPtJpq9mvaJAAAL","__senderName":"Alicer-Fcop"},"msg":"action_received"}
{"level":30,"time":1762302666979,"pid":2957627,"hostname":"ubuntu","timestamp":1762302666979,"socketId":"nTafb88_mh2uUp2iAAAN","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"nTafb88_mh2uUp2iAAAN","__senderName":"Alicer-nTaf"},"msg":"action_received"}

View File

@ -0,0 +1,15 @@
{"level":30,"time":1762304721819,"pid":3059021,"hostname":"ubuntu","timestamp":1762304721818,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip"},"msg":"action_received"}
{"level":30,"time":1762304723523,"pid":3059021,"hostname":"ubuntu","timestamp":1762304723523,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"TOGGLE_NOTE","trackIndex":0,"patternIndex":0,"stepIndex":0,"isActive":true,"__token":"2","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip"},"msg":"action_received"}
{"level":30,"time":1762304724302,"pid":3059021,"hostname":"ubuntu","timestamp":1762304724302,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"TOGGLE_NOTE","trackIndex":0,"patternIndex":0,"stepIndex":0,"isActive":false,"__token":"3","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip"},"msg":"action_received"}
{"level":30,"time":1762304847366,"pid":3059021,"hostname":"ubuntu","timestamp":1762304847366,"socketId":"4LF0oJrOV5v0tipSAAAP","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"4LF0oJrOV5v0tipSAAAP","__senderName":"Alicer-4LF0"},"msg":"action_received"}
{"level":30,"time":1762304848669,"pid":3059021,"hostname":"ubuntu","timestamp":1762304848669,"socketId":"4LF0oJrOV5v0tipSAAAP","action":{"type":"TOGGLE_NOTE","trackIndex":0,"patternIndex":0,"stepIndex":0,"isActive":true,"__token":"2","__senderId":"4LF0oJrOV5v0tipSAAAP","__senderName":"Alicer-4LF0"},"msg":"action_received"}
{"level":30,"time":1762304852423,"pid":3059021,"hostname":"ubuntu","timestamp":1762304852423,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"TOGGLE_NOTE","trackIndex":0,"patternIndex":0,"stepIndex":0,"isActive":false,"__token":"4","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip"},"msg":"action_received"}
{"level":30,"time":1762305077100,"pid":3059021,"hostname":"ubuntu","timestamp":1762305077100,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"SET_SYNC_MODE","mode":"local","__token":"5","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762305084928,"pid":3059021,"hostname":"ubuntu","timestamp":1762305084928,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"SET_LOOP_STATE","isLoopActive":true,"loopStartTime":0,"loopEndTime":8,"__token":"7","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762305094485,"pid":3059021,"hostname":"ubuntu","timestamp":1762305094485,"socketId":"4LF0oJrOV5v0tipSAAAP","action":{"type":"STOP_AUDIO_PLAYBACK","rewind":true,"__token":"4","__senderId":"4LF0oJrOV5v0tipSAAAP","__senderName":"Alicer-4LF0","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762305096017,"pid":3059021,"hostname":"ubuntu","timestamp":1762305096017,"socketId":"4LF0oJrOV5v0tipSAAAP","action":{"type":"START_AUDIO_PLAYBACK","seekTime":0,"loopState":{"isLoopActive":false,"loopStartTime":0,"loopEndTime":8},"__token":"5","__senderId":"4LF0oJrOV5v0tipSAAAP","__senderName":"Alicer-4LF0","scheduleAtServerMs":1762305096205,"__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762305096754,"pid":3059021,"hostname":"ubuntu","timestamp":1762305096754,"socketId":"4LF0oJrOV5v0tipSAAAP","action":{"type":"STOP_AUDIO_PLAYBACK","rewind":true,"__token":"6","__senderId":"4LF0oJrOV5v0tipSAAAP","__senderName":"Alicer-4LF0","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762305099250,"pid":3059021,"hostname":"ubuntu","timestamp":1762305099250,"socketId":"4LF0oJrOV5v0tipSAAAP","action":{"type":"SET_SYNC_MODE","mode":"local","__token":"7","__senderId":"4LF0oJrOV5v0tipSAAAP","__senderName":"Alicer-4LF0","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762305105386,"pid":3059021,"hostname":"ubuntu","timestamp":1762305105386,"socketId":"5Aipxv7gPJTN-Z8_AAAN","action":{"type":"SET_LOOP_STATE","isLoopActive":true,"loopStartTime":0,"loopEndTime":8,"__token":"9","__senderId":"5Aipxv7gPJTN-Z8_AAAN","__senderName":"Alicer-5Aip","__syncMode":"global"},"msg":"action_received"}
{"level":30,"time":1762364474581,"pid":3059021,"hostname":"ubuntu","timestamp":1762364474581,"socketId":"aXiMWlUqpNLYd80oAAAh","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"aXiMWlUqpNLYd80oAAAh","__senderName":"Alicer-aXiM"},"msg":"action_received"}
{"level":30,"time":1762367359293,"pid":3059021,"hostname":"ubuntu","timestamp":1762367359293,"socketId":"iICsIRAIp2R_6ybWAAAl","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"iICsIRAIp2R_6ybWAAAl","__senderName":"Alicer-iICs"},"msg":"action_received"}

View File

@ -0,0 +1 @@
{"level":30,"time":1762788277978,"pid":2587819,"hostname":"ubuntu","timestamp":1762788277978,"socketId":"pr_0y8m05cqgtEkWAAAZ","action":{"type":"AUDIO_SNAPSHOT_REQUEST","__token":"1","__senderId":"pr_0y8m05cqgtEkWAAAZ","__senderName":"Alicer-pr_0"},"msg":"action_received"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -518,6 +518,27 @@ io.on("connection", (socket) => {
}); });
}); });
// --- ENDPOINT DE NOTIFICAÇÃO EXTERNA ---
app.post("/notify-update", express.json(), (req, res) => {
const { updateType } = req.body;
if (updateType === "samples") {
// 1. Emitir o evento para TODAS as salas/clientes.
// O evento deve ser algo que seu ui.js entenda.
io.emit("system_update", {
type: "RELOAD_SAMPLES",
message: "Novo Sample/Project adicionado. Recarregando o navegador de arquivos...",
});
console.log(
"[Notificação] Evento 'RELOAD_SAMPLES' emitido para todos os clientes."
);
return res.status(200).send({ success: true, message: "Notificação de Samples/Projetos enviada." });
}
res.status(400).send({ success: false, message: "updateType inválido." });
});
app.get("/", (req, res) => { app.get("/", (req, res) => {
res.send( res.send(
"Servidor Backend V6 (Logs Dinâmicos) da DAW colaborativa está no ar!" "Servidor Backend V6 (Logs Dinâmicos) da DAW colaborativa está no ar!"

View File

@ -165,6 +165,18 @@ socket.on("connect_error", (err) => {
); );
}); });
socket.on("system_update", (data) => {
if (data.type === "RELOAD_SAMPLES") {
console.log(`[System Update] Recebida ordem para recarregar samples: ${data.message}`);
// Certifique-se de que esta função existe e faz o que é esperado:
// 1. Fetch dos novos manifestos (metadata/samples-manifest.json etc.)
// 2. Renderiza a interface do navegador de samples
loadAndRenderSampleBrowser();
}
// Se houver outras notificações, adicione-as aqui (ex: RELOAD_PROJECTS)
});
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// RECEBER ESTADO SALVO DA SALA // RECEBER ESTADO SALVO DA SALA
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------

2783
metadata/build-cache.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -30,9 +30,6 @@
"rave_bass02.ogg": { "rave_bass02.ogg": {
"_isFile": true "_isFile": true
}, },
"rave_bass03.ogg": {
"_isFile": true
},
"rave_bass04.ogg": { "rave_bass04.ogg": {
"_isFile": true "_isFile": true
}, },
@ -528,11 +525,7 @@
"_isFile": true "_isFile": true
} }
}, },
"samples": { "samples": {},
"bassdrum_acoustic02_-_Copia.ogg": {
"_isFile": true
}
},
"shapes": { "shapes": {
"additive.wav": { "additive.wav": {
"_isFile": true "_isFile": true

View File

@ -0,0 +1,773 @@
{
"basses": {
"bass01.ogg": {
"_isFile": true
},
"bass_acid01.ogg": {
"_isFile": true
},
"bass_acid02.ogg": {
"_isFile": true
},
"bass_hard01.ogg": {
"_isFile": true
},
"bass_hard02.ogg": {
"_isFile": true
},
"bass_punch01.ogg": {
"_isFile": true
},
"horror_bass01.ogg": {
"_isFile": true
},
"matrix1000_pluck01.ogg": {
"_isFile": true
},
"rave_bass01.ogg": {
"_isFile": true
},
"rave_bass02.ogg": {
"_isFile": true
},
"rave_bass04.ogg": {
"_isFile": true
},
"synth_acid01.ogg": {
"_isFile": true
},
"synth_acid02.ogg": {
"_isFile": true
},
"techno_synth01.ogg": {
"_isFile": true
}
},
"bassloops": {
"briff01.ogg": {
"_isFile": true
},
"rave_bass01.ogg": {
"_isFile": true
},
"rave_bass02.ogg": {
"_isFile": true
},
"tb303_01.ogg": {
"_isFile": true
},
"techno_bass01.ogg": {
"_isFile": true
},
"techno_bass02.ogg": {
"_isFile": true
},
"techno_synth01.ogg": {
"_isFile": true
},
"techno_synth02.ogg": {
"_isFile": true
},
"techno_synth03.ogg": {
"_isFile": true
},
"techno_synth04.ogg": {
"_isFile": true
}
},
"beats": {
"909beat01.ogg": {
"_isFile": true
},
"break01.ogg": {
"_isFile": true
},
"break02.ogg": {
"_isFile": true
},
"break03.ogg": {
"_isFile": true
},
"electro_beat01.ogg": {
"_isFile": true
},
"electro_beat02.ogg": {
"_isFile": true
},
"house_loop01.ogg": {
"_isFile": true
},
"jungle01.ogg": {
"_isFile": true
},
"rave_hihat01.ogg": {
"_isFile": true
},
"rave_hihat02.ogg": {
"_isFile": true
},
"rave_kick01.ogg": {
"_isFile": true
},
"rave_kick02.ogg": {
"_isFile": true
},
"rave_snare01.ogg": {
"_isFile": true
}
},
"drums": {
"bassdrum01.ogg": {
"_isFile": true
},
"bassdrum02.ogg": {
"_isFile": true
},
"bassdrum03.ogg": {
"_isFile": true
},
"bassdrum04.ogg": {
"_isFile": true
},
"bassdrum_acoustic01.ogg": {
"_isFile": true
},
"bassdrum_acoustic02.ogg": {
"_isFile": true
},
"clap01.ogg": {
"_isFile": true
},
"clap02.ogg": {
"_isFile": true
},
"clap03.ogg": {
"_isFile": true
},
"clap04.ogg": {
"_isFile": true
},
"clav01.ogg": {
"_isFile": true
},
"clav02.ogg": {
"_isFile": true
},
"crash01.ogg": {
"_isFile": true
},
"crash02.ogg": {
"_isFile": true
},
"hihat_closed01.ogg": {
"_isFile": true
},
"hihat_closed02.ogg": {
"_isFile": true
},
"hihat_closed03.ogg": {
"_isFile": true
},
"hihat_closed04.ogg": {
"_isFile": true
},
"hihat_closed05.ogg": {
"_isFile": true
},
"hihat_foot_pedal01.ogg": {
"_isFile": true
},
"hihat_opened01.ogg": {
"_isFile": true
},
"hihat_opened02.ogg": {
"_isFile": true
},
"hihat_opened03.ogg": {
"_isFile": true
},
"kick01.ogg": {
"_isFile": true
},
"kick02.ogg": {
"_isFile": true
},
"kick03.ogg": {
"_isFile": true
},
"kick04.ogg": {
"_isFile": true
},
"kick_distorted01.ogg": {
"_isFile": true
},
"kick_hard01.ogg": {
"_isFile": true
},
"kick_hardcore01.ogg": {
"_isFile": true
},
"kick_hiphop01.ogg": {
"_isFile": true
},
"kick_long01.ogg": {
"_isFile": true
},
"kick_soft01.ogg": {
"_isFile": true
},
"kick_soft02.ogg": {
"_isFile": true
},
"nasty_bass01.ogg": {
"_isFile": true
},
"nasty_rim01.ogg": {
"_isFile": true
},
"nasty_snare01.ogg": {
"_isFile": true
},
"ride01.ogg": {
"_isFile": true
},
"ride02.ogg": {
"_isFile": true
},
"rim01.ogg": {
"_isFile": true
},
"shaker01.ogg": {
"_isFile": true
},
"shaker02.ogg": {
"_isFile": true
},
"shaker03.ogg": {
"_isFile": true
},
"sidestick01.ogg": {
"_isFile": true
},
"snare01.ogg": {
"_isFile": true
},
"snare02.ogg": {
"_isFile": true
},
"snare03.ogg": {
"_isFile": true
},
"snare04.ogg": {
"_isFile": true
},
"snare05.ogg": {
"_isFile": true
},
"snare06.ogg": {
"_isFile": true
},
"snare07.ogg": {
"_isFile": true
},
"snare_acoustic01.ogg": {
"_isFile": true
},
"snare_electro01.ogg": {
"_isFile": true
},
"snare_harsh01.ogg": {
"_isFile": true
},
"snare_hiphop01.ogg": {
"_isFile": true
},
"snare_hiphop02.ogg": {
"_isFile": true
},
"snare_muffled01.ogg": {
"_isFile": true
},
"snare_muffled02.ogg": {
"_isFile": true
},
"snare_rim01.ogg": {
"_isFile": true
},
"snare_short01.ogg": {
"_isFile": true
},
"tom01.ogg": {
"_isFile": true
},
"tom02.ogg": {
"_isFile": true
},
"tom03.ogg": {
"_isFile": true
},
"tom04.ogg": {
"_isFile": true
},
"tom05.ogg": {
"_isFile": true
},
"tom_hi01.ogg": {
"_isFile": true
},
"tom_low01.ogg": {
"_isFile": true
},
"tom_mid01.ogg": {
"_isFile": true
},
"wood01.ogg": {
"_isFile": true
},
"zap01.ogg": {
"_isFile": true
},
"zap02.ogg": {
"_isFile": true
},
"zap03.ogg": {
"_isFile": true
}
},
"drumsynth": {
"acoustic": {},
"cr78": {},
"cr8000": {},
"effects": {},
"electro": {},
"farfisa": {},
"ferraro": {},
"instrument": {},
"jorgensohn": {},
"latin": {},
"linn": {},
"magnetboy": {},
"misc": {},
"misc_bass": {},
"misc_claps": {},
"misc_electro": {},
"misc_fx": {},
"misc_hats": {},
"misc_perc": {},
"misc_synth": {},
"r_n_b": {},
"tr606": {},
"tr77": {},
"tr808": {},
"tr909": {}
},
"effects": {
"chroma_sound_effect01.ogg": {
"_isFile": true
},
"explode01.ogg": {
"_isFile": true
},
"filter_sweep01.ogg": {
"_isFile": true
},
"scratch01.ogg": {
"_isFile": true
},
"start01.ogg": {
"_isFile": true
},
"start02.ogg": {
"_isFile": true
},
"stop01.ogg": {
"_isFile": true
},
"warp01.ogg": {
"_isFile": true
},
"warp02.ogg": {
"_isFile": true
},
"wind_chimes01.ogg": {
"_isFile": true
}
},
"instruments": {
"bassslap01.ogg": {
"_isFile": true
},
"bassslap02.ogg": {
"_isFile": true
},
"cello01.ogg": {
"_isFile": true
},
"church_organ01.ogg": {
"_isFile": true
},
"church_organ02.ogg": {
"_isFile": true
},
"church_organ03.ogg": {
"_isFile": true
},
"church_organ04.ogg": {
"_isFile": true
},
"e_organ01.ogg": {
"_isFile": true
},
"e_piano_accord01.ogg": {
"_isFile": true
},
"e_piano_accord02.ogg": {
"_isFile": true
},
"flute01.ogg": {
"_isFile": true
},
"harpsichord01.ogg": {
"_isFile": true
},
"piano01.ogg": {
"_isFile": true
},
"piano02.ogg": {
"_isFile": true
},
"steel_guitar01.ogg": {
"_isFile": true
},
"steel_guitar_heavy_distorted01.ogg": {
"_isFile": true
},
"steel_guitar_medium_distorted01.ogg": {
"_isFile": true
},
"steel_guitar_slight_distorted01.ogg": {
"_isFile": true
},
"trumpet01.ogg": {
"_isFile": true
},
"violin_double_stop01.ogg": {
"_isFile": true
},
"violin_fingered01.ogg": {
"_isFile": true
},
"violin_pizzicato01.ogg": {
"_isFile": true
}
},
"latin": {
"latin_brass01.ogg": {
"_isFile": true
},
"latin_guitar01.ogg": {
"_isFile": true
},
"latin_guitar02.ogg": {
"_isFile": true
},
"latin_guitar03.ogg": {
"_isFile": true
}
},
"misc": {
"applause01.ogg": {
"_isFile": true
},
"bass!.ogg": {
"_isFile": true
},
"breath01.ogg": {
"_isFile": true
},
"buzz!.ogg": {
"_isFile": true
},
"dong01.ogg": {
"_isFile": true
},
"dong02.ogg": {
"_isFile": true
},
"dong03.ogg": {
"_isFile": true
},
"electric_ping01.ogg": {
"_isFile": true
},
"hit01.ogg": {
"_isFile": true
},
"hit02.ogg": {
"_isFile": true
},
"metalish_dong01.ogg": {
"_isFile": true
},
"metronome01.ogg": {
"_isFile": true
},
"metronome02.ogg": {
"_isFile": true
},
"raving_crowd01.ogg": {
"_isFile": true
},
"snaph01.ogg": {
"_isFile": true
},
"undefined01.ogg": {
"_isFile": true
}
},
"samples": {
"reuniao_funkwhale.wav": {
"_isFile": true
}
},
"shapes": {
"additive.wav": {
"_isFile": true
},
"additive2.wav": {
"_isFile": true
},
"bunglist.wav": {
"_isFile": true
},
"bunglist2.wav": {
"_isFile": true
},
"bunglist3.wav": {
"_isFile": true
},
"bunglist4.wav": {
"_isFile": true
},
"bunglist5.wav": {
"_isFile": true
},
"bunglist6.wav": {
"_isFile": true
},
"bunglist7.wav": {
"_isFile": true
},
"bunglist8.wav": {
"_isFile": true
},
"bunglist9.wav": {
"_isFile": true
},
"harmonics.wav": {
"_isFile": true
},
"low_sine.wav": {
"_isFile": true
},
"micro.wav": {
"_isFile": true
},
"moog.wav": {
"_isFile": true
},
"roundbass.wav": {
"_isFile": true
},
"roundsaw.ogg": {
"_isFile": true
},
"saw+sine.wav": {
"_isFile": true
},
"sine_dist.ogg": {
"_isFile": true
},
"smooth_inv_saw.ogg": {
"_isFile": true
},
"smooth_inv_saw2.ogg": {
"_isFile": true
},
"smooth_inv_saw_dist.ogg": {
"_isFile": true
},
"technobass.wav": {
"_isFile": true
},
"technosynth1.wav": {
"_isFile": true
},
"technosynth2.wav": {
"_isFile": true
},
"technosynth3.wav": {
"_isFile": true
},
"technosynth4.wav": {
"_isFile": true
},
"technosynth5.wav": {
"_isFile": true
},
"technosynth6.wav": {
"_isFile": true
},
"vowel_a.wav": {
"_isFile": true
},
"vowel_u.wav": {
"_isFile": true
}
},
"stringsnpads": {
"bell_choir01.ogg": {
"_isFile": true
},
"bell_choir02.ogg": {
"_isFile": true
},
"chorus01.ogg": {
"_isFile": true
},
"chorus02.ogg": {
"_isFile": true
},
"heaven_strings01.ogg": {
"_isFile": true
},
"juno_pad01.ogg": {
"_isFile": true
},
"korg_poly6_drone01.ogg": {
"_isFile": true
},
"nord_ambient01.ogg": {
"_isFile": true
},
"orion_string01.ogg": {
"_isFile": true
},
"rave_choir01.ogg": {
"_isFile": true
},
"space_strings01.ogg": {
"_isFile": true
},
"space_strings02.ogg": {
"_isFile": true
},
"space_strings03.ogg": {
"_isFile": true
},
"strings01.ogg": {
"_isFile": true
}
},
"waveforms": {
"10saw.flac": {
"_isFile": true
},
"10sine.flac": {
"_isFile": true
},
"10sqr.flac": {
"_isFile": true
},
"10tri.flac": {
"_isFile": true
},
"analogsqr.flac": {
"_isFile": true
},
"fmsaw1.flac": {
"_isFile": true
},
"fmsine.flac": {
"_isFile": true
},
"fmsine2.flac": {
"_isFile": true
},
"halfsine.flac": {
"_isFile": true
},
"impulse.flac": {
"_isFile": true
},
"lfo_trancegate_quarter.flac": {
"_isFile": true
},
"lfo_trancegate_sawdecay_quarter.flac": {
"_isFile": true
},
"lfo_trancegate_sinedecay_quarter.flac": {
"_isFile": true
},
"lfo_trancegate_triplet_half.flac": {
"_isFile": true
},
"lfo_trancegate_triplet_half_2.flac": {
"_isFile": true
},
"lfo_trancegate_triplet_quarter.flac": {
"_isFile": true
},
"lfo_trancegate_whole.flac": {
"_isFile": true
},
"lfo_trancegate_whole_2.flac": {
"_isFile": true
},
"modsqr.flac": {
"_isFile": true
},
"saw1.flac": {
"_isFile": true
},
"saw2.flac": {
"_isFile": true
},
"sinesaw.flac": {
"_isFile": true
},
"w2_addsyn.flac": {
"_isFile": true
},
"w2_angrysaw.flac": {
"_isFile": true
},
"w2_hexagon.flac": {
"_isFile": true
},
"w2_invsine.flac": {
"_isFile": true
},
"w2_invsineabs.flac": {
"_isFile": true
},
"w2_invsinehalf.flac": {
"_isFile": true
},
"w2_noisy.flac": {
"_isFile": true
},
"w2_rad.flac": {
"_isFile": true
},
"w2_sawsine.flac": {
"_isFile": true
},
"w2_sharp.flac": {
"_isFile": true
},
"w2_w-wave.flac": {
"_isFile": true
}
}
}

Binary file not shown.

View File

@ -8,6 +8,9 @@ from werkzeug.utils import secure_filename
from flask_cors import CORS from flask_cors import CORS
from watchdog.observers import Observer from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
import requests
NODE_SERVER_URL = "https://127.0.0.1:33001"
# --- Configurações (sem alterações) --- # --- Configurações (sem alterações) ---
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
@ -27,13 +30,16 @@ CONFIGS = [
} }
] ]
UPLOAD_FOLDER = os.path.join(PROJECT_ROOT, "src", "samples", "samples") UPLOAD_FOLDER_SAMPLE = os.path.join(PROJECT_ROOT, "src", "samples", "samples")
WATCH_FOLDER = os.path.join(PROJECT_ROOT, "src", "samples") # Pasta a ser vigiada WATCH_FOLDER_SAMPLE = UPLOAD_FOLDER_SAMPLE # Pasta a ser vigiada
UPLOAD_FOLDER_PROJECT = os.path.join(PROJECT_ROOT, "src", "samples", "projects")
WATCH_FOLDER_PROJECT = os.path.join(PROJECT_ROOT, "src", "upload_projects") # Pasta a ser vigiada
ALLOWED_EXTENSIONS = {'wav', 'ogg', 'flac', 'mp3'} ALLOWED_EXTENSIONS = {'wav', 'ogg', 'flac', 'mp3'}
app = Flask(__name__) app = Flask(__name__)
CORS(app, origins=["https://alice.ufsj.edu.br", "http://localhost:8000", "http://127.0.0.1:5500"]) CORS(app, origins=["https://alice.ufsj.edu.br", "https://alice.ufsj.edu.br:33002"])
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['UPLOAD_FOLDER_SAMPLE'] = UPLOAD_FOLDER_SAMPLE
# --- Funções do Gerador de Manifesto (sem alterações) --- # --- Funções do Gerador de Manifesto (sem alterações) ---
def scan_directory_tree(path): def scan_directory_tree(path):
@ -75,9 +81,27 @@ def run_jekyll_build():
except Exception as e: except Exception as e:
print(f"ERRO inesperado durante o Jekyll build: {e}") print(f"ERRO inesperado durante o Jekyll build: {e}")
def notify_node_server(update_type):
"""Envia uma notificação HTTP para o servidor Node.js."""
try:
url = f"{NODE_SERVER_URL}/notify-update"
headers = {'Content-Type': 'application/json'}
payload = {"updateType": update_type}
# O argumento 'verify=False' é geralmente necessário ao usar 127.0.0.1
# com certificados Let's Encrypt (ou autoassinados) para evitar erros de SSL.
response = requests.post(url, json=payload, headers=headers, verify=False)
if response.status_code == 200:
print(f"[Notificação] Sucesso ao notificar Node.js: {update_type}")
else:
print(f"[Notificação] ERRO ao notificar Node.js ({response.status_code}): {response.text}")
except Exception as e:
print(f"[Notificação] ERRO de conexão com o Node.js em {NODE_SERVER_URL}: {e}")
def generate_manifests(): def generate_manifests():
print("\nIniciando geração de arquivos de manifesto...") print("\nIniciando geração de arquivos de manifesto...")
# ... (lógica interna sem alterações) ...
for config in CONFIGS: for config in CONFIGS:
source_dir_abs = os.path.join(PROJECT_ROOT, config["source_dir"]) source_dir_abs = os.path.join(PROJECT_ROOT, config["source_dir"])
output_file_abs = os.path.join(PROJECT_ROOT, config["output_file"]) output_file_abs = os.path.join(PROJECT_ROOT, config["output_file"])
@ -93,8 +117,10 @@ def generate_manifests():
print(f"SUCESSO: Arquivo '{output_file_abs}' gerado!") print(f"SUCESSO: Arquivo '{output_file_abs}' gerado!")
print("\nGeração de manifestos concluída.") print("\nGeração de manifestos concluída.")
run_jekyll_build() run_jekyll_build()
# 2. NOTIFICA O SERVIDOR NODE.JS (NOVO PASSO)
notify_node_server("samples")
# --- Lógica do Servidor Flask (sem alterações) --- # --- Servidor Flask ---
def allowed_file(filename): def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@ -105,41 +131,58 @@ def upload_file():
if file.filename == '': return jsonify({"error": "Nenhum arquivo selecionado"}), 400 if file.filename == '': return jsonify({"error": "Nenhum arquivo selecionado"}), 400
if file and allowed_file(file.filename): if file and allowed_file(file.filename):
filename = secure_filename(file.filename) filename = secure_filename(file.filename)
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) os.makedirs(app.config['UPLOAD_FOLDER_SAMPLE'], exist_ok=True)
save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) save_path = os.path.join(app.config['UPLOAD_FOLDER_SAMPLE'], filename)
try: try:
file.save(save_path) file.save(save_path)
generate_manifests() return jsonify({"success": True, "message": f"Arquivo '{filename}' salvo!"}), 200
return jsonify({"success": True, "message": f"Arquivo '{filename}' salvo e site atualizado!"}), 200
except Exception as e: except Exception as e:
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
return jsonify({"error": "Tipo de arquivo não permitido"}), 400 return jsonify({"error": "Tipo de arquivo não permitido"}), 400
# --- NOVO: LÓGICA DO "VIGIA" DE ARQUIVOS (WATCHDOG) --- # --- LÓGICA DO "VIGIA" DE ARQUIVOS (WATCHDOG) ---
class ManifestEventHandler(FileSystemEventHandler): class ManifestEventHandler(FileSystemEventHandler):
"""Um manipulador de eventos que gera os manifestos quando um arquivo muda.""" """Um manipulador de eventos que gera os manifestos quando um arquivo muda."""
def __init__(self): def __init__(self):
# Inicializa o timestamp da última execução
self.last_triggered = 0 self.last_triggered = 0
# Define o intervalo mínimo entre execuções (5 segundos)
self.debounce_interval = 5
def on_any_event(self, event): def on_any_event(self, event):
# Desconsidera eventos em diretórios # Ignora eventos em diretórios ou no arquivo de manifesto de saída (evita loop)
if event.is_directory: if event.is_directory or "manifest" in event.src_path:
return return
# Mecanismo simples para evitar múltiplas execuções seguidas (debounce)
current_time = time.time() current_time = time.time()
if current_time - self.last_triggered > 5: # Espera 5 segundos entre as atualizações
# Verifica se passou tempo suficiente desde o último acionamento
if current_time - self.last_triggered > self.debounce_interval:
print(f"\n[VIGIA] Mudança detectada: {event.src_path}") print(f"\n[VIGIA] Mudança detectada: {event.src_path}")
generate_manifests()
# Executa a geração de manifestos (e o Jekyll build, se descomentado)
# É crucial que generate_manifests() seja rápido ou executado em outra thread separada
# para não bloquear o Watchdog por muito tempo.
Thread(target=generate_manifests).start()
self.last_triggered = current_time self.last_triggered = current_time
else:
print(f"[VIGIA] Mudança ignorada devido ao debounce (Intervalo de {self.debounce_interval}s).")
def start_file_watcher(): def start_file_watcher():
"""Inicia o observador de arquivos em uma thread separada.""" """Inicia o observador de arquivos para ambas as pastas em uma thread separada."""
print(f"\n[VIGIA] Iniciando monitoramento da pasta: {WATCH_FOLDER}")
event_handler = ManifestEventHandler() event_handler = ManifestEventHandler()
observer = Observer() observer = Observer()
observer.schedule(event_handler, WATCH_FOLDER, recursive=True)
# 1. Monitorar a pasta de Samples
print(f"\n[VIGIA] Monitorando Samples: {WATCH_FOLDER_SAMPLE}")
observer.schedule(event_handler, WATCH_FOLDER_SAMPLE, recursive=True)
# 2. Monitorar a pasta de Projetos
print(f"[VIGIA] Monitorando Projetos: {WATCH_FOLDER_PROJECT}")
observer.schedule(event_handler, WATCH_FOLDER_PROJECT, recursive=True)
observer.start() observer.start()
try: try:
while True: while True:
@ -160,4 +203,4 @@ if __name__ == '__main__':
# Inicia o servidor Flask (thread principal) # Inicia o servidor Flask (thread principal)
print("\n[FLASK] Iniciando servidor de upload...") print("\n[FLASK] Iniciando servidor de upload...")
app.run(host='0.0.0.0', port=5000, debug=False) # Debug mode deve ser False para evitar que rode duas vezes app.run(host='0.0.0.0', port=33002, debug=True) # Debug mode deve ser False para evitar que rode duas vezes

8
venv/bin/normalizer Executable file
View File

@ -0,0 +1,8 @@
#!/nethome/jotachina/projetos/mmpSearch/venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from charset_normalizer.cli import cli_detect
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(cli_detect())

View File

@ -0,0 +1,164 @@
/* -*- indent-tabs-mode: nil; tab-width: 4; -*- */
/* Greenlet object interface */
#ifndef Py_GREENLETOBJECT_H
#define Py_GREENLETOBJECT_H
#include <Python.h>
#ifdef __cplusplus
extern "C" {
#endif
/* This is deprecated and undocumented. It does not change. */
#define GREENLET_VERSION "1.0.0"
#ifndef GREENLET_MODULE
#define implementation_ptr_t void*
#endif
typedef struct _greenlet {
PyObject_HEAD
PyObject* weakreflist;
PyObject* dict;
implementation_ptr_t pimpl;
} PyGreenlet;
#define PyGreenlet_Check(op) (op && PyObject_TypeCheck(op, &PyGreenlet_Type))
/* C API functions */
/* Total number of symbols that are exported */
#define PyGreenlet_API_pointers 12
#define PyGreenlet_Type_NUM 0
#define PyExc_GreenletError_NUM 1
#define PyExc_GreenletExit_NUM 2
#define PyGreenlet_New_NUM 3
#define PyGreenlet_GetCurrent_NUM 4
#define PyGreenlet_Throw_NUM 5
#define PyGreenlet_Switch_NUM 6
#define PyGreenlet_SetParent_NUM 7
#define PyGreenlet_MAIN_NUM 8
#define PyGreenlet_STARTED_NUM 9
#define PyGreenlet_ACTIVE_NUM 10
#define PyGreenlet_GET_PARENT_NUM 11
#ifndef GREENLET_MODULE
/* This section is used by modules that uses the greenlet C API */
static void** _PyGreenlet_API = NULL;
# define PyGreenlet_Type \
(*(PyTypeObject*)_PyGreenlet_API[PyGreenlet_Type_NUM])
# define PyExc_GreenletError \
((PyObject*)_PyGreenlet_API[PyExc_GreenletError_NUM])
# define PyExc_GreenletExit \
((PyObject*)_PyGreenlet_API[PyExc_GreenletExit_NUM])
/*
* PyGreenlet_New(PyObject *args)
*
* greenlet.greenlet(run, parent=None)
*/
# define PyGreenlet_New \
(*(PyGreenlet * (*)(PyObject * run, PyGreenlet * parent)) \
_PyGreenlet_API[PyGreenlet_New_NUM])
/*
* PyGreenlet_GetCurrent(void)
*
* greenlet.getcurrent()
*/
# define PyGreenlet_GetCurrent \
(*(PyGreenlet * (*)(void)) _PyGreenlet_API[PyGreenlet_GetCurrent_NUM])
/*
* PyGreenlet_Throw(
* PyGreenlet *greenlet,
* PyObject *typ,
* PyObject *val,
* PyObject *tb)
*
* g.throw(...)
*/
# define PyGreenlet_Throw \
(*(PyObject * (*)(PyGreenlet * self, \
PyObject * typ, \
PyObject * val, \
PyObject * tb)) \
_PyGreenlet_API[PyGreenlet_Throw_NUM])
/*
* PyGreenlet_Switch(PyGreenlet *greenlet, PyObject *args)
*
* g.switch(*args, **kwargs)
*/
# define PyGreenlet_Switch \
(*(PyObject * \
(*)(PyGreenlet * greenlet, PyObject * args, PyObject * kwargs)) \
_PyGreenlet_API[PyGreenlet_Switch_NUM])
/*
* PyGreenlet_SetParent(PyObject *greenlet, PyObject *new_parent)
*
* g.parent = new_parent
*/
# define PyGreenlet_SetParent \
(*(int (*)(PyGreenlet * greenlet, PyGreenlet * nparent)) \
_PyGreenlet_API[PyGreenlet_SetParent_NUM])
/*
* PyGreenlet_GetParent(PyObject* greenlet)
*
* return greenlet.parent;
*
* This could return NULL even if there is no exception active.
* If it does not return NULL, you are responsible for decrementing the
* reference count.
*/
# define PyGreenlet_GetParent \
(*(PyGreenlet* (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_GET_PARENT_NUM])
/*
* deprecated, undocumented alias.
*/
# define PyGreenlet_GET_PARENT PyGreenlet_GetParent
# define PyGreenlet_MAIN \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_MAIN_NUM])
# define PyGreenlet_STARTED \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_STARTED_NUM])
# define PyGreenlet_ACTIVE \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_ACTIVE_NUM])
/* Macro that imports greenlet and initializes C API */
/* NOTE: This has actually moved to ``greenlet._greenlet._C_API``, but we
keep the older definition to be sure older code that might have a copy of
the header still works. */
# define PyGreenlet_Import() \
{ \
_PyGreenlet_API = (void**)PyCapsule_Import("greenlet._C_API", 0); \
}
#endif /* GREENLET_MODULE */
#ifdef __cplusplus
}
#endif
#endif /* !Py_GREENLETOBJECT_H */

View File

@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2014 Miguel Grinberg
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,76 @@
Metadata-Version: 2.1
Name: Flask-SocketIO
Version: 5.5.1
Summary: Socket.IO integration for Flask applications
Author-email: Miguel Grinberg <miguel.grinberg@gmail.com>
Project-URL: Homepage, https://github.com/miguelgrinberg/flask-socketio
Project-URL: Bug Tracker, https://github.com/miguelgrinberg/flask-socketio/issues
Classifier: Environment :: Web Environment
Classifier: Intended Audience :: Developers
Classifier: Programming Language :: Python :: 3
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Requires-Python: >=3.6
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: Flask>=0.9
Requires-Dist: python-socketio>=5.12.0
Provides-Extra: docs
Requires-Dist: sphinx; extra == "docs"
Flask-SocketIO
==============
[![Build status](https://github.com/miguelgrinberg/flask-socketio/workflows/build/badge.svg)](https://github.com/miguelgrinberg/Flask-SocketIO/actions) [![codecov](https://codecov.io/gh/miguelgrinberg/flask-socketio/branch/main/graph/badge.svg)](https://codecov.io/gh/miguelgrinberg/flask-socketio)
Socket.IO integration for Flask applications.
Sponsors
--------
The following organizations are funding this project:
![Socket.IO](https://images.opencollective.com/socketio/050e5eb/logo/64.png)<br>[Socket.IO](https://socket.io) | [Add your company here!](https://github.com/sponsors/miguelgrinberg)|
-|-
Many individual sponsors also support this project through small ongoing contributions. Why not [join them](https://github.com/sponsors/miguelgrinberg)?
Installation
------------
You can install this package as usual with pip:
pip install flask-socketio
Example
-------
```py
from flask import Flask, render_template
from flask_socketio import SocketIO, emit
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app)
@app.route('/')
def index():
return render_template('index.html')
@socketio.event
def my_event(message):
emit('my response', {'data': 'got it!'})
if __name__ == '__main__':
socketio.run(app)
```
Resources
---------
- [Tutorial](http://blog.miguelgrinberg.com/post/easy-websockets-with-flask-and-gevent)
- [Documentation](http://flask-socketio.readthedocs.io/en/latest/)
- [PyPI](https://pypi.python.org/pypi/Flask-SocketIO)
- [Change Log](https://github.com/miguelgrinberg/Flask-SocketIO/blob/main/CHANGES.md)
- Questions? See the [questions](https://stackoverflow.com/questions/tagged/flask-socketio) others have asked on Stack Overflow, or [ask](https://stackoverflow.com/questions/ask?tags=python+flask-socketio+python-socketio) your own question.

View File

@ -0,0 +1,13 @@
Flask_SocketIO-5.5.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
Flask_SocketIO-5.5.1.dist-info/LICENSE,sha256=aNCWbkgKjS_T1cJtACyZbvCM36KxWnfQ0LWTuavuYKQ,1082
Flask_SocketIO-5.5.1.dist-info/METADATA,sha256=7YA8ZKizrtJiaCqqdDiTU6t1xWWdTmNw3CqBxSMcW3k,2635
Flask_SocketIO-5.5.1.dist-info/RECORD,,
Flask_SocketIO-5.5.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
Flask_SocketIO-5.5.1.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
Flask_SocketIO-5.5.1.dist-info/top_level.txt,sha256=C1ugzQBJ3HHUJsWGzyt70XRVOX-y4CUAR8MWKjwJOQ8,15
flask_socketio/__init__.py,sha256=5hN0LE0hfGMUDcX4FheZrtXERJ1IBEPagv0pgeqdtlU,54904
flask_socketio/__pycache__/__init__.cpython-312.pyc,,
flask_socketio/__pycache__/namespace.cpython-312.pyc,,
flask_socketio/__pycache__/test_client.cpython-312.pyc,,
flask_socketio/namespace.py,sha256=UkVryJvFYgnCMKWSF35GVfGdyh2cXRDyRbfmEPPchVA,2329
flask_socketio/test_client.py,sha256=rClk02TSRqgidH8IyeohspKVKdpRx7gcZBjg1YUtZpA,11026

View File

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: setuptools (75.7.0)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@ -0,0 +1 @@
flask_socketio

View File

@ -0,0 +1,376 @@
Mozilla Public License Version 2.0
==================================
Copyright 2009-2024 Joshua Bronson. All rights reserved.
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

View File

@ -0,0 +1,260 @@
Metadata-Version: 2.1
Name: bidict
Version: 0.23.1
Summary: The bidirectional mapping library for Python.
Author-email: Joshua Bronson <jabronson@gmail.com>
License: MPL 2.0
Project-URL: Changelog, https://bidict.readthedocs.io/changelog.html
Project-URL: Documentation, https://bidict.readthedocs.io
Project-URL: Funding, https://bidict.readthedocs.io/#sponsoring
Project-URL: Repository, https://github.com/jab/bidict
Keywords: bidict,bimap,bidirectional,dict,dictionary,mapping,collections
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Typing :: Typed
Requires-Python: >=3.8
Description-Content-Type: text/x-rst
License-File: LICENSE
.. role:: doc
.. (Forward declaration for the "doc" role that Sphinx defines for interop with renderers that
are often used to show this doc and that are unaware of Sphinx (GitHub.com, PyPI.org, etc.).
Use :doc: rather than :ref: here for better interop as well.)
bidict
======
*The bidirectional mapping library for Python.*
Status
------
.. image:: https://img.shields.io/pypi/v/bidict.svg
:target: https://pypi.org/project/bidict
:alt: Latest release
.. image:: https://img.shields.io/readthedocs/bidict/main.svg
:target: https://bidict.readthedocs.io/en/main/
:alt: Documentation
.. image:: https://github.com/jab/bidict/actions/workflows/test.yml/badge.svg
:target: https://github.com/jab/bidict/actions/workflows/test.yml?query=branch%3Amain
:alt: GitHub Actions CI status
.. image:: https://img.shields.io/pypi/l/bidict.svg
:target: https://raw.githubusercontent.com/jab/bidict/main/LICENSE
:alt: License
.. image:: https://static.pepy.tech/badge/bidict
:target: https://pepy.tech/project/bidict
:alt: PyPI Downloads
.. image:: https://img.shields.io/badge/GitHub-sponsor-ff69b4
:target: https://github.com/sponsors/jab
:alt: Sponsor
Features
--------
- Mature: Depended on by
Google, Venmo, CERN, Baidu, Tencent,
and teams across the world since 2009
- Familiar, Pythonic APIs
that are carefully designed for
safety, simplicity, flexibility, and ergonomics
- Lightweight, with no runtime dependencies
outside Python's standard library
- Implemented in
concise, well-factored, fully type-hinted Python code
that is optimized for running efficiently
as well as for long-term maintenance and stability
(as well as `joy <#learning-from-bidict>`__)
- Extensively `documented <https://bidict.readthedocs.io>`__
- 100% test coverage
running continuously across all supported Python versions
(including property-based tests and benchmarks)
Installation
------------
``pip install bidict``
Quick Start
-----------
.. code:: python
>>> from bidict import bidict
>>> element_by_symbol = bidict({'H': 'hydrogen'})
>>> element_by_symbol['H']
'hydrogen'
>>> element_by_symbol.inverse['hydrogen']
'H'
For more usage documentation,
head to the :doc:`intro` [#fn-intro]_
and proceed from there.
Enterprise Support
------------------
Enterprise-level support for bidict can be obtained via the
`Tidelift subscription <https://tidelift.com/subscription/pkg/pypi-bidict?utm_source=pypi-bidict&utm_medium=referral&utm_campaign=readme>`__
or by `contacting me directly <mailto:jabronson@gmail.com>`__.
I have a US-based LLC set up for invoicing,
and I have 15+ years of professional experience
delivering software and support to companies successfully.
You can also sponsor my work through several platforms, including GitHub Sponsors.
See the `Sponsoring <#sponsoring>`__ section below for details,
including rationale and examples of companies
supporting the open source projects they depend on.
Voluntary Community Support
---------------------------
Please search through already-asked questions and answers
in `GitHub Discussions <https://github.com/jab/bidict/discussions>`__
and the `issue tracker <https://github.com/jab/bidict/issues?q=is%3Aissue>`__
in case your question has already been addressed.
Otherwise, please feel free to
`start a new discussion <https://github.com/jab/bidict/discussions>`__
or `create a new issue <https://github.com/jab/bidict/issues/new>`__ on GitHub
for voluntary community support.
Notice of Usage
---------------
If you use bidict,
and especially if your usage or your organization is significant in some way,
please let me know in any of the following ways:
- `star bidict on GitHub <https://github.com/jab/bidict>`__
- post in `GitHub Discussions <https://github.com/jab/bidict/discussions>`__
- `email me <mailto:jabronson@gmail.com>`__
Changelog
---------
For bidict release notes, see the :doc:`changelog`. [#fn-changelog]_
Release Notifications
---------------------
.. duplicated in CHANGELOG.rst:
(Would use `.. include::` but GitHub's renderer doesn't support it.)
Watch `bidict releases on GitHub <https://github.com/jab/bidict/releases>`__
to be notified when new versions of bidict are published.
Click the "Watch" dropdown, choose "Custom", and then choose "Releases".
Learning from bidict
--------------------
One of the best things about bidict
is that it touches a surprising number of
interesting Python corners,
especially given its small size and scope.
Check out :doc:`learning-from-bidict` [#fn-learning]_
if you're interested in learning more.
Contributing
------------
I have been bidict's sole maintainer
and `active contributor <https://github.com/jab/bidict/graphs/contributors>`__
since I started the project ~15 years ago.
Your help would be most welcome!
See the :doc:`contributors-guide` [#fn-contributing]_
for more information.
Sponsoring
----------
.. duplicated in CONTRIBUTING.rst
(Would use `.. include::` but GitHub's renderer doesn't support it.)
.. image:: https://img.shields.io/badge/GitHub-sponsor-ff69b4
:target: https://github.com/sponsors/jab
:alt: Sponsor through GitHub
Bidict is the product of thousands of hours of my unpaid work
over the 15+ years that I've been the sole maintainer.
If bidict has helped you or your company accomplish your work,
please sponsor my work through one of the following,
and/or ask your company to do the same:
- `GitHub <https://github.com/sponsors/jab>`__
- `PayPal <https://www.paypal.com/cgi-bin/webscr?cmd=_xclick&business=jabronson%40gmail%2ecom&lc=US&item_name=Sponsor%20bidict>`__
- `Tidelift <https://tidelift.com>`__
- `thanks.dev <https://thanks.dev>`__
- `Gumroad <https://gumroad.com/l/bidict>`__
- `a support engagement with my LLC <#enterprise-support>`__
If you're not sure which to use, GitHub is an easy option,
especially if you already have a GitHub account.
Just choose a monthly or one-time amount, and GitHub handles everything else.
Your bidict sponsorship on GitHub will automatically go
on the same regular bill as any other GitHub charges you pay for.
PayPal is another easy option for one-time contributions.
See the following for rationale and examples of companies
supporting the open source projects they depend on
in this manner:
- `<https://engineering.atspotify.com/2022/04/announcing-the-spotify-foss-fund/>`__
- `<https://blog.sentry.io/2021/10/21/we-just-gave-154-999-dollars-and-89-cents-to-open-source-maintainers>`__
- `<https://engineering.indeedblog.com/blog/2019/07/foss-fund-six-months-in/>`__
.. - `<https://sethmlarson.dev/blog/people-in-your-software-supply-chain>`__
.. - `<https://www.cognitect.com/blog/supporting-open-source-developers>`__
.. - `<https://vorpus.org/blog/the-unreasonable-effectiveness-of-investment-in-open-source-infrastructure/>`__
Finding Documentation
---------------------
If you're viewing this on `<https://bidict.readthedocs.io>`__,
note that multiple versions of the documentation are available,
and you can choose a different version using the popup menu at the bottom-right.
Please make sure you're viewing the version of the documentation
that corresponds to the version of bidict you'd like to use.
If you're viewing this on GitHub, PyPI, or some other place
that can't render and link this documentation properly
and are seeing broken links,
try these alternate links instead:
.. [#fn-intro] `<https://bidict.readthedocs.io/intro.html>`__ | `<docs/intro.rst>`__
.. [#fn-changelog] `<https://bidict.readthedocs.io/changelog.html>`__ | `<CHANGELOG.rst>`__
.. [#fn-learning] `<https://bidict.readthedocs.io/learning-from-bidict.html>`__ | `<docs/learning-from-bidict.rst>`__
.. [#fn-contributing] `<https://bidict.readthedocs.io/contributors-guide.html>`__ | `<CONTRIBUTING.rst>`__

View File

@ -0,0 +1,31 @@
bidict-0.23.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
bidict-0.23.1.dist-info/LICENSE,sha256=8_U63OyqSNc6ZuI4-lupBstBh2eDtF0ooTRrMULuvZo,16784
bidict-0.23.1.dist-info/METADATA,sha256=2ovIRm6Df8gdwAMekGqkeBSF5TWj2mv1jpmh4W4ks7o,8704
bidict-0.23.1.dist-info/RECORD,,
bidict-0.23.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
bidict-0.23.1.dist-info/top_level.txt,sha256=WuQO02jp0ODioS7sJoaHg3JJ5_3h6Sxo9RITvNGPYmc,7
bidict/__init__.py,sha256=pL87KsrDpBsl3AG09LQk1t1TSFt0hIJVYa2POMdErN8,4398
bidict/__pycache__/__init__.cpython-312.pyc,,
bidict/__pycache__/_abc.cpython-312.pyc,,
bidict/__pycache__/_base.cpython-312.pyc,,
bidict/__pycache__/_bidict.cpython-312.pyc,,
bidict/__pycache__/_dup.cpython-312.pyc,,
bidict/__pycache__/_exc.cpython-312.pyc,,
bidict/__pycache__/_frozen.cpython-312.pyc,,
bidict/__pycache__/_iter.cpython-312.pyc,,
bidict/__pycache__/_orderedbase.cpython-312.pyc,,
bidict/__pycache__/_orderedbidict.cpython-312.pyc,,
bidict/__pycache__/_typing.cpython-312.pyc,,
bidict/__pycache__/metadata.cpython-312.pyc,,
bidict/_abc.py,sha256=SMCNdCsmqSWg0OGnMZtnnXY8edjXcyZup5tva4HBm_c,3172
bidict/_base.py,sha256=YiauA0aj52fNB6cfZ4gBt6OV-CRQoZm7WVhuw1nT-Cg,24439
bidict/_bidict.py,sha256=Sr-RoEzWOaxpnDRbDJ7ngaGRIsyGnqZgzvR-NyT4jl4,6923
bidict/_dup.py,sha256=YAn5gWA6lwMBA5A6ebVF19UTZyambGS8WxmbK4TN1Ww,2079
bidict/_exc.py,sha256=HnD_WgteI5PrXa3zBx9RUiGlgnZTO6CF4nIU9p3-njk,1066
bidict/_frozen.py,sha256=p4TaRHKeyTs0KmlpwSnZiTlN_CR4J97kAgBpNdZHQMs,1771
bidict/_iter.py,sha256=zVUx-hJ1M4YuJROoFWRjPKlcaFnyo1AAuRpOaKAFhOQ,1530
bidict/_orderedbase.py,sha256=M7v5rHa7vrym9Z3DxQBFQDxjnrr39Z8p26V0c1PggoE,8942
bidict/_orderedbidict.py,sha256=pPnmC19mIISrj8_yjnb-4r_ti1B74tD5eTd08DETNuI,7080
bidict/_typing.py,sha256=AylMZpBhEFTQegfziPSxfKkKLk7oUsH6o3awDIg2z_k,1289
bidict/metadata.py,sha256=BMIKu6fBY_OKeV_q48EpumE7MdmFw8rFcdaUz8kcIYk,573
bidict/py.typed,sha256=RJao5SVFYIp8IfbxhL_SpZkBQYe3XXzPlobSRdh4B_c,16

View File

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.42.0)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@ -0,0 +1,103 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# ============================================================================
# * Welcome to the bidict source code *
# ============================================================================
# Reading through the code? You'll find a "Code review nav" comment like the one
# below at the top and bottom of the key source files. Follow these cues to take
# a path through the code that's optimized for familiarizing yourself with it.
#
# If you're not reading this on https://github.com/jab/bidict already, go there
# to ensure you have the latest version of the code. While there, you can also
# star the project, watch it for updates, fork the code, and submit an issue or
# pull request with any proposed changes. More information can be found linked
# from README.rst, which is also shown on https://github.com/jab/bidict.
# * Code review nav *
# ============================================================================
# Current: __init__.py Next: _abc.py →
# ============================================================================
"""The bidirectional mapping library for Python.
----
bidict by example:
.. code-block:: python
>>> from bidict import bidict
>>> element_by_symbol = bidict({'H': 'hydrogen'})
>>> element_by_symbol['H']
'hydrogen'
>>> element_by_symbol.inverse['hydrogen']
'H'
Please see https://github.com/jab/bidict for the most up-to-date code and
https://bidict.readthedocs.io for the most up-to-date documentation
if you are reading this elsewhere.
----
.. :copyright: (c) 2009-2024 Joshua Bronson.
.. :license: MPLv2. See LICENSE for details.
"""
# Use private aliases to not re-export these publicly (for Sphinx automodule with imported-members).
from __future__ import annotations as _annotations
from contextlib import suppress as _suppress
from ._abc import BidirectionalMapping as BidirectionalMapping
from ._abc import MutableBidirectionalMapping as MutableBidirectionalMapping
from ._base import BidictBase as BidictBase
from ._base import BidictKeysView as BidictKeysView
from ._base import GeneratedBidictInverse as GeneratedBidictInverse
from ._bidict import MutableBidict as MutableBidict
from ._bidict import bidict as bidict
from ._dup import DROP_NEW as DROP_NEW
from ._dup import DROP_OLD as DROP_OLD
from ._dup import ON_DUP_DEFAULT as ON_DUP_DEFAULT
from ._dup import ON_DUP_DROP_OLD as ON_DUP_DROP_OLD
from ._dup import ON_DUP_RAISE as ON_DUP_RAISE
from ._dup import RAISE as RAISE
from ._dup import OnDup as OnDup
from ._dup import OnDupAction as OnDupAction
from ._exc import BidictException as BidictException
from ._exc import DuplicationError as DuplicationError
from ._exc import KeyAndValueDuplicationError as KeyAndValueDuplicationError
from ._exc import KeyDuplicationError as KeyDuplicationError
from ._exc import ValueDuplicationError as ValueDuplicationError
from ._frozen import frozenbidict as frozenbidict
from ._iter import inverted as inverted
from ._orderedbase import OrderedBidictBase as OrderedBidictBase
from ._orderedbidict import OrderedBidict as OrderedBidict
from .metadata import __author__ as __author__
from .metadata import __copyright__ as __copyright__
from .metadata import __description__ as __description__
from .metadata import __license__ as __license__
from .metadata import __url__ as __url__
from .metadata import __version__ as __version__
# Set __module__ of re-exported classes to the 'bidict' top-level module, so that e.g.
# 'bidict.bidict' shows up as 'bidict.bidict` rather than 'bidict._bidict.bidict'.
for _obj in tuple(locals().values()): # pragma: no cover
if not getattr(_obj, '__module__', '').startswith('bidict.'):
continue
with _suppress(AttributeError):
_obj.__module__ = 'bidict'
# * Code review nav *
# ============================================================================
# Current: __init__.py Next: _abc.py →
# ============================================================================

View File

@ -0,0 +1,79 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# * Code review nav *
# (see comments in __init__.py)
# ============================================================================
# ← Prev: __init__.py Current: _abc.py Next: _base.py →
# ============================================================================
"""Provide the :class:`BidirectionalMapping` abstract base class."""
from __future__ import annotations
import typing as t
from abc import abstractmethod
from ._typing import KT
from ._typing import VT
class BidirectionalMapping(t.Mapping[KT, VT]):
"""Abstract base class for bidirectional mapping types.
Extends :class:`collections.abc.Mapping` primarily by adding the
(abstract) :attr:`inverse` property,
which implementers of :class:`BidirectionalMapping`
should override to return a reference to the inverse
:class:`BidirectionalMapping` instance.
"""
__slots__ = ()
@property
@abstractmethod
def inverse(self) -> BidirectionalMapping[VT, KT]:
"""The inverse of this bidirectional mapping instance.
*See also* :attr:`bidict.BidictBase.inverse`, :attr:`bidict.BidictBase.inv`
:raises NotImplementedError: Meant to be overridden in subclasses.
"""
# The @abstractmethod decorator prevents subclasses from being instantiated unless they
# override this method. But an overriding implementation may merely return super().inverse,
# in which case this implementation is used. Raise NotImplementedError to indicate that
# subclasses must actually provide their own implementation.
raise NotImplementedError
def __inverted__(self) -> t.Iterator[tuple[VT, KT]]:
"""Get an iterator over the items in :attr:`inverse`.
This is functionally equivalent to iterating over the items in the
forward mapping and inverting each one on the fly, but this provides a
more efficient implementation: Assuming the already-inverted items
are stored in :attr:`inverse`, just return an iterator over them directly.
Providing this default implementation enables external functions,
particularly :func:`~bidict.inverted`, to use this optimized
implementation when available, instead of having to invert on the fly.
*See also* :func:`bidict.inverted`
"""
return iter(self.inverse.items())
class MutableBidirectionalMapping(BidirectionalMapping[KT, VT], t.MutableMapping[KT, VT]):
"""Abstract base class for mutable bidirectional mapping types."""
__slots__ = ()
# * Code review nav *
# ============================================================================
# ← Prev: __init__.py Current: _abc.py Next: _base.py →
# ============================================================================

View File

@ -0,0 +1,556 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# * Code review nav *
# (see comments in __init__.py)
# ============================================================================
# ← Prev: _abc.py Current: _base.py Next: _frozen.py →
# ============================================================================
"""Provide :class:`BidictBase`."""
from __future__ import annotations
import typing as t
import weakref
from itertools import starmap
from operator import eq
from types import MappingProxyType
from ._abc import BidirectionalMapping
from ._dup import DROP_NEW
from ._dup import DROP_OLD
from ._dup import ON_DUP_DEFAULT
from ._dup import RAISE
from ._dup import OnDup
from ._exc import DuplicationError
from ._exc import KeyAndValueDuplicationError
from ._exc import KeyDuplicationError
from ._exc import ValueDuplicationError
from ._iter import inverted
from ._iter import iteritems
from ._typing import KT
from ._typing import MISSING
from ._typing import OKT
from ._typing import OVT
from ._typing import VT
from ._typing import Maplike
from ._typing import MapOrItems
OldKV = t.Tuple[OKT[KT], OVT[VT]]
DedupResult = t.Optional[OldKV[KT, VT]]
Unwrites = t.List[t.Tuple[t.Any, ...]]
BT = t.TypeVar('BT', bound='BidictBase[t.Any, t.Any]')
class BidictKeysView(t.KeysView[KT], t.ValuesView[KT]):
"""Since the keys of a bidict are the values of its inverse (and vice versa),
the :class:`~collections.abc.ValuesView` result of calling *bi.values()*
is also a :class:`~collections.abc.KeysView` of *bi.inverse*.
"""
class BidictBase(BidirectionalMapping[KT, VT]):
"""Base class implementing :class:`BidirectionalMapping`."""
#: The default :class:`~bidict.OnDup`
#: that governs behavior when a provided item
#: duplicates the key or value of other item(s).
#:
#: *See also*
#: :ref:`basic-usage:Values Must Be Unique` (https://bidict.rtfd.io/basic-usage.html#values-must-be-unique),
#: :doc:`extending` (https://bidict.rtfd.io/extending.html)
on_dup = ON_DUP_DEFAULT
_fwdm: t.MutableMapping[KT, VT] #: the backing forward mapping (*key* → *val*)
_invm: t.MutableMapping[VT, KT] #: the backing inverse mapping (*val* → *key*)
# Use Any rather than KT/VT in the following to avoid "ClassVar cannot contain type variables" errors:
_fwdm_cls: t.ClassVar[type[t.MutableMapping[t.Any, t.Any]]] = dict #: class of the backing forward mapping
_invm_cls: t.ClassVar[type[t.MutableMapping[t.Any, t.Any]]] = dict #: class of the backing inverse mapping
#: The class of the inverse bidict instance.
_inv_cls: t.ClassVar[type[BidictBase[t.Any, t.Any]]]
def __init_subclass__(cls) -> None:
super().__init_subclass__()
cls._init_class()
@classmethod
def _init_class(cls) -> None:
cls._ensure_inv_cls()
cls._set_reversed()
__reversed__: t.ClassVar[t.Any]
@classmethod
def _set_reversed(cls) -> None:
"""Set __reversed__ for subclasses that do not set it explicitly
according to whether backing mappings are reversible.
"""
if cls is not BidictBase:
resolved = cls.__reversed__
overridden = resolved is not BidictBase.__reversed__
if overridden: # E.g. OrderedBidictBase, OrderedBidict
return
backing_reversible = all(issubclass(i, t.Reversible) for i in (cls._fwdm_cls, cls._invm_cls))
cls.__reversed__ = _fwdm_reversed if backing_reversible else None
@classmethod
def _ensure_inv_cls(cls) -> None:
"""Ensure :attr:`_inv_cls` is set, computing it dynamically if necessary.
All subclasses provided in :mod:`bidict` are their own inverse classes,
i.e., their backing forward and inverse mappings are both the same type,
but users may define subclasses where this is not the case.
This method ensures that the inverse class is computed correctly regardless.
See: :ref:`extending:Dynamic Inverse Class Generation`
(https://bidict.rtfd.io/extending.html#dynamic-inverse-class-generation)
"""
# This _ensure_inv_cls() method is (indirectly) corecursive with _make_inv_cls() below
# in the case that we need to dynamically generate the inverse class:
# 1. _ensure_inv_cls() calls cls._make_inv_cls()
# 2. cls._make_inv_cls() calls type(..., (cls, ...), ...) to dynamically generate inv_cls
# 3. Our __init_subclass__ hook (see above) is automatically called on inv_cls
# 4. inv_cls.__init_subclass__() calls inv_cls._ensure_inv_cls()
# 5. inv_cls._ensure_inv_cls() resolves to this implementation
# (inv_cls deliberately does not override this), so we're back where we started.
# But since the _make_inv_cls() call will have set inv_cls.__dict__._inv_cls,
# just check if it's already set before calling _make_inv_cls() to prevent infinite recursion.
if getattr(cls, '__dict__', {}).get('_inv_cls'): # Don't assume cls.__dict__ (e.g. mypyc native class)
return
cls._inv_cls = cls._make_inv_cls()
@classmethod
def _make_inv_cls(cls: type[BT]) -> type[BT]:
diff = cls._inv_cls_dict_diff()
cls_is_own_inv = all(getattr(cls, k, MISSING) == v for (k, v) in diff.items())
if cls_is_own_inv:
return cls
# Suppress auto-calculation of _inv_cls's _inv_cls since we know it already.
# Works with the guard in BidictBase._ensure_inv_cls() to prevent infinite recursion.
diff['_inv_cls'] = cls
inv_cls = type(f'{cls.__name__}Inv', (cls, GeneratedBidictInverse), diff)
inv_cls.__module__ = cls.__module__
return t.cast(t.Type[BT], inv_cls)
@classmethod
def _inv_cls_dict_diff(cls) -> dict[str, t.Any]:
return {
'_fwdm_cls': cls._invm_cls,
'_invm_cls': cls._fwdm_cls,
}
def __init__(self, arg: MapOrItems[KT, VT] = (), /, **kw: VT) -> None:
"""Make a new bidirectional mapping.
The signature behaves like that of :class:`dict`.
ktems passed via positional arg are processed first,
followed by any items passed via keyword argument.
Any duplication encountered along the way
is handled as per :attr:`on_dup`.
"""
self._fwdm = self._fwdm_cls()
self._invm = self._invm_cls()
self._update(arg, kw, rollback=False)
# If Python ever adds support for higher-kinded types, `inverse` could use them, e.g.
# def inverse(self: BT[KT, VT]) -> BT[VT, KT]:
# Ref: https://github.com/python/typing/issues/548#issuecomment-621571821
@property
def inverse(self) -> BidictBase[VT, KT]:
"""The inverse of this bidirectional mapping instance."""
# When `bi.inverse` is called for the first time, this method
# computes the inverse instance, stores it for subsequent use, and then
# returns it. It also stores a reference on `bi.inverse` back to `bi`,
# but uses a weakref to avoid creating a reference cycle. Strong references
# to inverse instances are stored in ._inv, and weak references are stored
# in ._invweak.
# First check if a strong reference is already stored.
inv: BidictBase[VT, KT] | None = getattr(self, '_inv', None)
if inv is not None:
return inv
# Next check if a weak reference is already stored.
invweak = getattr(self, '_invweak', None)
if invweak is not None:
inv = invweak() # Try to resolve a strong reference and return it.
if inv is not None:
return inv
# No luck. Compute the inverse reference and store it for subsequent use.
inv = self._make_inverse()
self._inv: BidictBase[VT, KT] | None = inv
self._invweak: weakref.ReferenceType[BidictBase[VT, KT]] | None = None
# Also store a weak reference back to `instance` on its inverse instance, so that
# the second `.inverse` access in `bi.inverse.inverse` hits the cached weakref.
inv._inv = None
inv._invweak = weakref.ref(self)
# In e.g. `bidict().inverse.inverse`, this design ensures that a strong reference
# back to the original instance is retained before its refcount drops to zero,
# avoiding an unintended potential deallocation.
return inv
def _make_inverse(self) -> BidictBase[VT, KT]:
inv: BidictBase[VT, KT] = self._inv_cls()
inv._fwdm = self._invm
inv._invm = self._fwdm
return inv
@property
def inv(self) -> BidictBase[VT, KT]:
"""Alias for :attr:`inverse`."""
return self.inverse
def __repr__(self) -> str:
"""See :func:`repr`."""
clsname = self.__class__.__name__
items = dict(self.items()) if self else ''
return f'{clsname}({items})'
def values(self) -> BidictKeysView[VT]:
"""A set-like object providing a view on the contained values.
Since the values of a bidict are equivalent to the keys of its inverse,
this method returns a set-like object for this bidict's values
rather than just a collections.abc.ValuesView.
This object supports set operations like union and difference,
and constant- rather than linear-time containment checks,
and is no more expensive to provide than the less capable
collections.abc.ValuesView would be.
See :meth:`keys` for more information.
"""
return t.cast(BidictKeysView[VT], self.inverse.keys())
def keys(self) -> t.KeysView[KT]:
"""A set-like object providing a view on the contained keys.
When *b._fwdm* is a :class:`dict`, *b.keys()* returns a
*dict_keys* object that behaves exactly the same as
*collections.abc.KeysView(b)*, except for
- offering better performance
- being reversible on Python 3.8+
- having a .mapping attribute in Python 3.10+
that exposes a mappingproxy to *b._fwdm*.
"""
fwdm, fwdm_cls = self._fwdm, self._fwdm_cls
return fwdm.keys() if fwdm_cls is dict else BidictKeysView(self)
def items(self) -> t.ItemsView[KT, VT]:
"""A set-like object providing a view on the contained items.
When *b._fwdm* is a :class:`dict`, *b.items()* returns a
*dict_items* object that behaves exactly the same as
*collections.abc.ItemsView(b)*, except for:
- offering better performance
- being reversible on Python 3.8+
- having a .mapping attribute in Python 3.10+
that exposes a mappingproxy to *b._fwdm*.
"""
return self._fwdm.items() if self._fwdm_cls is dict else super().items()
# The inherited collections.abc.Mapping.__contains__() method is implemented by doing a `try`
# `except KeyError` around `self[key]`. The following implementation is much faster,
# especially in the missing case.
def __contains__(self, key: t.Any) -> bool:
"""True if the mapping contains the specified key, else False."""
return key in self._fwdm
# The inherited collections.abc.Mapping.__eq__() method is implemented in terms of an inefficient
# `dict(self.items()) == dict(other.items())` comparison, so override it with a
# more efficient implementation.
def __eq__(self, other: object) -> bool:
"""*x.__eq__(other)  x == other*
Equivalent to *dict(x.items()) == dict(other.items())*
but more efficient.
Note that :meth:`bidict's __eq__() <bidict.BidictBase.__eq__>` implementation
is inherited by subclasses,
in particular by the ordered bidict subclasses,
so even with ordered bidicts,
:ref:`== comparison is order-insensitive <eq-order-insensitive>`
(https://bidict.rtfd.io/other-bidict-types.html#eq-is-order-insensitive).
*See also* :meth:`equals_order_sensitive`
"""
if isinstance(other, t.Mapping):
return self._fwdm.items() == other.items()
# Ref: https://docs.python.org/3/library/constants.html#NotImplemented
return NotImplemented
def equals_order_sensitive(self, other: object) -> bool:
"""Order-sensitive equality check.
*See also* :ref:`eq-order-insensitive`
(https://bidict.rtfd.io/other-bidict-types.html#eq-is-order-insensitive)
"""
if not isinstance(other, t.Mapping) or len(self) != len(other):
return False
return all(starmap(eq, zip(self.items(), other.items())))
def _dedup(self, key: KT, val: VT, on_dup: OnDup) -> DedupResult[KT, VT]:
"""Check *key* and *val* for any duplication in self.
Handle any duplication as per the passed in *on_dup*.
If (key, val) is already present, return None
since writing (key, val) would be a no-op.
If duplication is found and the corresponding :class:`~bidict.OnDupAction` is
:attr:`~bidict.DROP_NEW`, return None.
If duplication is found and the corresponding :class:`~bidict.OnDupAction` is
:attr:`~bidict.RAISE`, raise the appropriate exception.
If duplication is found and the corresponding :class:`~bidict.OnDupAction` is
:attr:`~bidict.DROP_OLD`, or if no duplication is found,
return *(oldkey, oldval)*.
"""
fwdm, invm = self._fwdm, self._invm
oldval: OVT[VT] = fwdm.get(key, MISSING)
oldkey: OKT[KT] = invm.get(val, MISSING)
isdupkey, isdupval = oldval is not MISSING, oldkey is not MISSING
if isdupkey and isdupval:
if key == oldkey:
assert val == oldval
# (key, val) duplicates an existing item -> no-op.
return None
# key and val each duplicate a different existing item.
if on_dup.val is RAISE:
raise KeyAndValueDuplicationError(key, val)
if on_dup.val is DROP_NEW:
return None
assert on_dup.val is DROP_OLD
# Fall through to the return statement on the last line.
elif isdupkey:
if on_dup.key is RAISE:
raise KeyDuplicationError(key)
if on_dup.key is DROP_NEW:
return None
assert on_dup.key is DROP_OLD
# Fall through to the return statement on the last line.
elif isdupval:
if on_dup.val is RAISE:
raise ValueDuplicationError(val)
if on_dup.val is DROP_NEW:
return None
assert on_dup.val is DROP_OLD
# Fall through to the return statement on the last line.
# else neither isdupkey nor isdupval.
return oldkey, oldval
def _write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], unwrites: Unwrites | None) -> None:
"""Insert (newkey, newval), extending *unwrites* with associated inverse operations if provided.
*oldkey* and *oldval* are as returned by :meth:`_dedup`.
If *unwrites* is not None, it is extended with the inverse operations necessary to undo the write.
This design allows :meth:`_update` to roll back a partially applied update that fails part-way through
when necessary.
This design also allows subclasses that require additional operations to easily extend this implementation.
For example, :class:`bidict.OrderedBidictBase` calls this inherited implementation, and then extends *unwrites*
with additional operations needed to keep its internal linked list nodes consistent with its items' order
as changes are made.
"""
fwdm, invm = self._fwdm, self._invm
fwdm_set, invm_set = fwdm.__setitem__, invm.__setitem__
fwdm_del, invm_del = fwdm.__delitem__, invm.__delitem__
# Always perform the following writes regardless of duplication.
fwdm_set(newkey, newval)
invm_set(newval, newkey)
if oldval is MISSING and oldkey is MISSING: # no key or value duplication
# {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
if unwrites is not None:
unwrites.extend((
(fwdm_del, newkey),
(invm_del, newval),
))
elif oldval is not MISSING and oldkey is not MISSING: # key and value duplication across two different items
# {0: 1, 2: 3} | {0: 3} => {0: 3}
fwdm_del(oldkey)
invm_del(oldval)
if unwrites is not None:
unwrites.extend((
(fwdm_set, newkey, oldval),
(invm_set, oldval, newkey),
(fwdm_set, oldkey, newval),
(invm_set, newval, oldkey),
))
elif oldval is not MISSING: # just key duplication
# {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
invm_del(oldval)
if unwrites is not None:
unwrites.extend((
(fwdm_set, newkey, oldval),
(invm_set, oldval, newkey),
(invm_del, newval),
))
else:
assert oldkey is not MISSING # just value duplication
# {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
fwdm_del(oldkey)
if unwrites is not None:
unwrites.extend((
(fwdm_set, oldkey, newval),
(invm_set, newval, oldkey),
(fwdm_del, newkey),
))
def _update(
self,
arg: MapOrItems[KT, VT],
kw: t.Mapping[str, VT] = MappingProxyType({}),
*,
rollback: bool | None = None,
on_dup: OnDup | None = None,
) -> None:
"""Update with the items from *arg* and *kw*, maybe failing and rolling back as per *on_dup* and *rollback*."""
# Note: We must process input in a single pass, since arg may be a generator.
if not isinstance(arg, (t.Iterable, Maplike)):
raise TypeError(f"'{arg.__class__.__name__}' object is not iterable")
if not arg and not kw:
return
if on_dup is None:
on_dup = self.on_dup
if rollback is None:
rollback = RAISE in on_dup
# Fast path when we're empty and updating only from another bidict (i.e. no dup vals in new items).
if not self and not kw and isinstance(arg, BidictBase):
self._init_from(arg)
return
# Fast path when we're adding more items than we contain already and rollback is enabled:
# Update a copy of self with rollback disabled. Fail if that fails, otherwise become the copy.
if rollback and isinstance(arg, t.Sized) and len(arg) + len(kw) > len(self):
tmp = self.copy()
tmp._update(arg, kw, rollback=False, on_dup=on_dup)
self._init_from(tmp)
return
# In all other cases, benchmarking has indicated that the update is best implemented as follows:
# For each new item, perform a dup check (raising if necessary), and apply the associated writes we need to
# perform on our backing _fwdm and _invm mappings. If rollback is enabled, also compute the associated unwrites
# as we go. If the update results in a DuplicationError and rollback is enabled, apply the accumulated unwrites
# before raising, to ensure that we fail clean.
write = self._write
unwrites: Unwrites | None = [] if rollback else None
for key, val in iteritems(arg, **kw):
try:
dedup_result = self._dedup(key, val, on_dup)
except DuplicationError:
if unwrites is not None:
for fn, *args in reversed(unwrites):
fn(*args)
raise
if dedup_result is not None:
write(key, val, *dedup_result, unwrites=unwrites)
def __copy__(self: BT) -> BT:
"""Used for the copy protocol. See the :mod:`copy` module."""
return self.copy()
def copy(self: BT) -> BT:
"""Make a (shallow) copy of this bidict."""
# Could just `return self.__class__(self)` here, but the below is faster. The former
# would copy this bidict's items into a new instance one at a time (checking for duplication
# for each item), whereas the below copies from the backing mappings all at once, and foregoes
# item-by-item duplication checking since the backing mappings have been checked already.
return self._from_other(self.__class__, self)
@staticmethod
def _from_other(bt: type[BT], other: MapOrItems[KT, VT], inv: bool = False) -> BT:
"""Fast, private constructor based on :meth:`_init_from`.
If *inv* is true, return the inverse of the instance instead of the instance itself.
(Useful for pickling with dynamically-generated inverse classes -- see :meth:`__reduce__`.)
"""
inst = bt()
inst._init_from(other)
return t.cast(BT, inst.inverse) if inv else inst
def _init_from(self, other: MapOrItems[KT, VT]) -> None:
"""Fast init from *other*, bypassing item-by-item duplication checking."""
self._fwdm.clear()
self._invm.clear()
self._fwdm.update(other)
# If other is a bidict, use its existing backing inverse mapping, otherwise
# other could be a generator that's now exhausted, so invert self._fwdm on the fly.
inv = other.inverse if isinstance(other, BidictBase) else inverted(self._fwdm)
self._invm.update(inv)
# other's type is Mapping rather than Maplike since bidict() | SupportsKeysAndGetItem({})
# raises a TypeError, just like dict() | SupportsKeysAndGetItem({}) does.
def __or__(self: BT, other: t.Mapping[KT, VT]) -> BT:
"""Return self|other."""
if not isinstance(other, t.Mapping):
return NotImplemented
new = self.copy()
new._update(other, rollback=False)
return new
def __ror__(self: BT, other: t.Mapping[KT, VT]) -> BT:
"""Return other|self."""
if not isinstance(other, t.Mapping):
return NotImplemented
new = self.__class__(other)
new._update(self, rollback=False)
return new
def __len__(self) -> int:
"""The number of contained items."""
return len(self._fwdm)
def __iter__(self) -> t.Iterator[KT]:
"""Iterator over the contained keys."""
return iter(self._fwdm)
def __getitem__(self, key: KT) -> VT:
"""*x.__getitem__(key) ⟺ x[key]*"""
return self._fwdm[key]
def __reduce__(self) -> tuple[t.Any, ...]:
"""Return state information for pickling."""
cls = self.__class__
inst: t.Mapping[t.Any, t.Any] = self
# If this bidict's class is dynamically generated, pickle the inverse instead, whose (presumably not
# dynamically generated) class the caller is more likely to have a reference to somewhere in sys.modules
# that pickle can discover.
if should_invert := isinstance(self, GeneratedBidictInverse):
cls = self._inv_cls
inst = self.inverse
return self._from_other, (cls, dict(inst), should_invert)
# See BidictBase._set_reversed() above.
def _fwdm_reversed(self: BidictBase[KT, t.Any]) -> t.Iterator[KT]:
"""Iterator over the contained keys in reverse order."""
assert isinstance(self._fwdm, t.Reversible)
return reversed(self._fwdm)
BidictBase._init_class()
class GeneratedBidictInverse:
"""Base class for dynamically-generated inverse bidict classes."""
# * Code review nav *
# ============================================================================
# ← Prev: _abc.py Current: _base.py Next: _frozen.py →
# ============================================================================

View File

@ -0,0 +1,194 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# * Code review nav *
# (see comments in __init__.py)
# ============================================================================
# ← Prev: _frozen.py Current: _bidict.py Next: _orderedbase.py →
# ============================================================================
"""Provide :class:`MutableBidict` and :class:`bidict`."""
from __future__ import annotations
import typing as t
from ._abc import MutableBidirectionalMapping
from ._base import BidictBase
from ._dup import ON_DUP_DROP_OLD
from ._dup import ON_DUP_RAISE
from ._dup import OnDup
from ._typing import DT
from ._typing import KT
from ._typing import MISSING
from ._typing import ODT
from ._typing import VT
from ._typing import MapOrItems
class MutableBidict(BidictBase[KT, VT], MutableBidirectionalMapping[KT, VT]):
"""Base class for mutable bidirectional mappings."""
if t.TYPE_CHECKING:
@property
def inverse(self) -> MutableBidict[VT, KT]: ...
@property
def inv(self) -> MutableBidict[VT, KT]: ...
def _pop(self, key: KT) -> VT:
val = self._fwdm.pop(key)
del self._invm[val]
return val
def __delitem__(self, key: KT) -> None:
"""*x.__delitem__(y)  del x[y]*"""
self._pop(key)
def __setitem__(self, key: KT, val: VT) -> None:
"""Set the value for *key* to *val*.
If *key* is already associated with *val*, this is a no-op.
If *key* is already associated with a different value,
the old value will be replaced with *val*,
as with dict's :meth:`__setitem__`.
If *val* is already associated with a different key,
an exception is raised
to protect against accidental removal of the key
that's currently associated with *val*.
Use :meth:`put` instead if you want to specify different behavior in
the case that the provided key or value duplicates an existing one.
Or use :meth:`forceput` to unconditionally associate *key* with *val*,
replacing any existing items as necessary to preserve uniqueness.
:raises bidict.ValueDuplicationError: if *val* duplicates that of an
existing item.
:raises bidict.KeyAndValueDuplicationError: if *key* duplicates the key of an
existing item and *val* duplicates the value of a different
existing item.
"""
self.put(key, val, on_dup=self.on_dup)
def put(self, key: KT, val: VT, on_dup: OnDup = ON_DUP_RAISE) -> None:
"""Associate *key* with *val*, honoring the :class:`OnDup` given in *on_dup*.
For example, if *on_dup* is :attr:`~bidict.ON_DUP_RAISE`,
then *key* will be associated with *val* if and only if
*key* is not already associated with an existing value and
*val* is not already associated with an existing key,
otherwise an exception will be raised.
If *key* is already associated with *val*, this is a no-op.
:raises bidict.KeyDuplicationError: if attempting to insert an item
whose key only duplicates an existing item's, and *on_dup.key* is
:attr:`~bidict.RAISE`.
:raises bidict.ValueDuplicationError: if attempting to insert an item
whose value only duplicates an existing item's, and *on_dup.val* is
:attr:`~bidict.RAISE`.
:raises bidict.KeyAndValueDuplicationError: if attempting to insert an
item whose key duplicates one existing item's, and whose value
duplicates another existing item's, and *on_dup.val* is
:attr:`~bidict.RAISE`.
"""
self._update(((key, val),), on_dup=on_dup)
def forceput(self, key: KT, val: VT) -> None:
"""Associate *key* with *val* unconditionally.
Replace any existing mappings containing key *key* or value *val*
as necessary to preserve uniqueness.
"""
self.put(key, val, on_dup=ON_DUP_DROP_OLD)
def clear(self) -> None:
"""Remove all items."""
self._fwdm.clear()
self._invm.clear()
@t.overload
def pop(self, key: KT, /) -> VT: ...
@t.overload
def pop(self, key: KT, default: DT = ..., /) -> VT | DT: ...
def pop(self, key: KT, default: ODT[DT] = MISSING, /) -> VT | DT:
"""*x.pop(k[, d]) → v*
Remove specified key and return the corresponding value.
:raises KeyError: if *key* is not found and no *default* is provided.
"""
try:
return self._pop(key)
except KeyError:
if default is MISSING:
raise
return default
def popitem(self) -> tuple[KT, VT]:
"""*x.popitem() → (k, v)*
Remove and return some item as a (key, value) pair.
:raises KeyError: if *x* is empty.
"""
key, val = self._fwdm.popitem()
del self._invm[val]
return key, val
def update(self, arg: MapOrItems[KT, VT] = (), /, **kw: VT) -> None:
"""Like calling :meth:`putall` with *self.on_dup* passed for *on_dup*."""
self._update(arg, kw=kw)
def forceupdate(self, arg: MapOrItems[KT, VT] = (), /, **kw: VT) -> None:
"""Like a bulk :meth:`forceput`."""
self._update(arg, kw=kw, on_dup=ON_DUP_DROP_OLD)
def putall(self, items: MapOrItems[KT, VT], on_dup: OnDup = ON_DUP_RAISE) -> None:
"""Like a bulk :meth:`put`.
If one of the given items causes an exception to be raised,
none of the items is inserted.
"""
self._update(items, on_dup=on_dup)
# other's type is Mapping rather than Maplike since bidict() |= SupportsKeysAndGetItem({})
# raises a TypeError, just like dict() |= SupportsKeysAndGetItem({}) does.
def __ior__(self, other: t.Mapping[KT, VT]) -> MutableBidict[KT, VT]:
"""Return self|=other."""
self.update(other)
return self
class bidict(MutableBidict[KT, VT]):
"""The main bidirectional mapping type.
See :ref:`intro:Introduction` and :ref:`basic-usage:Basic Usage`
to get started (also available at https://bidict.rtfd.io).
"""
if t.TYPE_CHECKING:
@property
def inverse(self) -> bidict[VT, KT]: ...
@property
def inv(self) -> bidict[VT, KT]: ...
# * Code review nav *
# ============================================================================
# ← Prev: _frozen.py Current: _bidict.py Next: _orderedbase.py →
# ============================================================================

View File

@ -0,0 +1,61 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Provide :class:`OnDup` and related functionality."""
from __future__ import annotations
import typing as t
from enum import Enum
class OnDupAction(Enum):
"""An action to take to prevent duplication from occurring."""
#: Raise a :class:`~bidict.DuplicationError`.
RAISE = 'RAISE'
#: Overwrite existing items with new items.
DROP_OLD = 'DROP_OLD'
#: Keep existing items and drop new items.
DROP_NEW = 'DROP_NEW'
def __repr__(self) -> str:
return f'{self.__class__.__name__}.{self.name}'
RAISE: t.Final[OnDupAction] = OnDupAction.RAISE
DROP_OLD: t.Final[OnDupAction] = OnDupAction.DROP_OLD
DROP_NEW: t.Final[OnDupAction] = OnDupAction.DROP_NEW
class OnDup(t.NamedTuple):
r"""A combination of :class:`~bidict.OnDupAction`\s specifying how to handle various types of duplication.
The :attr:`~OnDup.key` field specifies what action to take when a duplicate key is encountered.
The :attr:`~OnDup.val` field specifies what action to take when a duplicate value is encountered.
In the case of both key and value duplication across two different items,
only :attr:`~OnDup.val` is used.
*See also* :ref:`basic-usage:Values Must Be Unique`
(https://bidict.rtfd.io/basic-usage.html#values-must-be-unique)
"""
key: OnDupAction = DROP_OLD
val: OnDupAction = RAISE
#: Default :class:`OnDup` used for the
#: :meth:`~bidict.bidict.__init__`,
#: :meth:`~bidict.bidict.__setitem__`, and
#: :meth:`~bidict.bidict.update` methods.
ON_DUP_DEFAULT: t.Final[OnDup] = OnDup(key=DROP_OLD, val=RAISE)
#: An :class:`OnDup` whose members are all :obj:`RAISE`.
ON_DUP_RAISE: t.Final[OnDup] = OnDup(key=RAISE, val=RAISE)
#: An :class:`OnDup` whose members are all :obj:`DROP_OLD`.
ON_DUP_DROP_OLD: t.Final[OnDup] = OnDup(key=DROP_OLD, val=DROP_OLD)

View File

@ -0,0 +1,36 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Provide all bidict exceptions."""
from __future__ import annotations
class BidictException(Exception):
"""Base class for bidict exceptions."""
class DuplicationError(BidictException):
"""Base class for exceptions raised when uniqueness is violated
as per the :attr:`~bidict.RAISE` :class:`~bidict.OnDupAction`.
"""
class KeyDuplicationError(DuplicationError):
"""Raised when a given key is not unique."""
class ValueDuplicationError(DuplicationError):
"""Raised when a given value is not unique."""
class KeyAndValueDuplicationError(KeyDuplicationError, ValueDuplicationError):
"""Raised when a given item's key and value are not unique.
That is, its key duplicates that of another item,
and its value duplicates that of a different other item.
"""

View File

@ -0,0 +1,50 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# * Code review nav *
# (see comments in __init__.py)
# ============================================================================
# ← Prev: _base.py Current: _frozen.py Next: _bidict.py →
# ============================================================================
"""Provide :class:`frozenbidict`, an immutable, hashable bidirectional mapping type."""
from __future__ import annotations
import typing as t
from ._base import BidictBase
from ._typing import KT
from ._typing import VT
class frozenbidict(BidictBase[KT, VT]):
"""Immutable, hashable bidict type."""
_hash: int
if t.TYPE_CHECKING:
@property
def inverse(self) -> frozenbidict[VT, KT]: ...
@property
def inv(self) -> frozenbidict[VT, KT]: ...
def __hash__(self) -> int:
"""The hash of this bidict as determined by its items."""
if getattr(self, '_hash', None) is None:
# The following is like hash(frozenset(self.items()))
# but more memory efficient. See also: https://bugs.python.org/issue46684
self._hash = t.ItemsView(self)._hash()
return self._hash
# * Code review nav *
# ============================================================================
# ← Prev: _base.py Current: _frozen.py Next: _bidict.py →
# ============================================================================

View File

@ -0,0 +1,51 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Functions for iterating over items in a mapping."""
from __future__ import annotations
import typing as t
from operator import itemgetter
from ._typing import KT
from ._typing import VT
from ._typing import ItemsIter
from ._typing import Maplike
from ._typing import MapOrItems
def iteritems(arg: MapOrItems[KT, VT] = (), /, **kw: VT) -> ItemsIter[KT, VT]:
"""Yield the items from *arg* and *kw* in the order given."""
if isinstance(arg, t.Mapping):
yield from arg.items()
elif isinstance(arg, Maplike):
yield from ((k, arg[k]) for k in arg.keys())
else:
yield from arg
yield from t.cast(ItemsIter[KT, VT], kw.items())
swap: t.Final = itemgetter(1, 0)
def inverted(arg: MapOrItems[KT, VT]) -> ItemsIter[VT, KT]:
"""Yield the inverse items of the provided object.
If *arg* has a :func:`callable` ``__inverted__`` attribute,
return the result of calling it.
Otherwise, return an iterator over the items in `arg`,
inverting each item on the fly.
*See also* :attr:`bidict.BidirectionalMapping.__inverted__`
"""
invattr = getattr(arg, '__inverted__', None)
if callable(invattr):
inv: ItemsIter[VT, KT] = invattr()
return inv
return map(swap, iteritems(arg))

View File

@ -0,0 +1,238 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# * Code review nav *
# (see comments in __init__.py)
# ============================================================================
# ← Prev: _bidict.py Current: _orderedbase.py Next: _orderedbidict.py →
# ============================================================================
"""Provide :class:`OrderedBidictBase`."""
from __future__ import annotations
import typing as t
from weakref import ref as weakref
from ._base import BidictBase
from ._base import Unwrites
from ._bidict import bidict
from ._iter import iteritems
from ._typing import KT
from ._typing import MISSING
from ._typing import OKT
from ._typing import OVT
from ._typing import VT
from ._typing import MapOrItems
AT = t.TypeVar('AT') # attr type
class WeakAttr(t.Generic[AT]):
"""Descriptor to automatically manage (de)referencing the given slot as a weakref.
See https://docs.python.org/3/howto/descriptor.html#managed-attributes
for an intro to using descriptors like this for managed attributes.
"""
def __init__(self, *, slot: str) -> None:
self.slot = slot
def __set__(self, instance: t.Any, value: AT) -> None:
setattr(instance, self.slot, weakref(value))
def __get__(self, instance: t.Any, __owner: t.Any = None) -> AT:
return t.cast(AT, getattr(instance, self.slot)())
class Node:
"""A node in a circular doubly-linked list
used to encode the order of items in an ordered bidict.
A weak reference to the previous node is stored
to avoid creating strong reference cycles.
Referencing/dereferencing the weakref is handled automatically by :class:`WeakAttr`.
"""
prv: WeakAttr[Node] = WeakAttr(slot='_prv_weak')
__slots__ = ('__weakref__', '_prv_weak', 'nxt')
nxt: Node | WeakAttr[Node] # Allow subclasses to use a WeakAttr for nxt too (see SentinelNode)
def __init__(self, prv: Node, nxt: Node) -> None:
self.prv = prv
self.nxt = nxt
def unlink(self) -> None:
"""Remove self from in between prv and nxt.
Self's references to prv and nxt are retained so it can be relinked (see below).
"""
self.prv.nxt = self.nxt
self.nxt.prv = self.prv
def relink(self) -> None:
"""Restore self between prv and nxt after unlinking (see above)."""
self.prv.nxt = self.nxt.prv = self
class SentinelNode(Node):
"""Special node in a circular doubly-linked list
that links the first node with the last node.
When its next and previous references point back to itself
it represents an empty list.
"""
nxt: WeakAttr[Node] = WeakAttr(slot='_nxt_weak')
__slots__ = ('_nxt_weak',)
def __init__(self) -> None:
super().__init__(self, self)
def iternodes(self, *, reverse: bool = False) -> t.Iterator[Node]:
"""Iterator yielding nodes in the requested order."""
attr = 'prv' if reverse else 'nxt'
node = getattr(self, attr)
while node is not self:
yield node
node = getattr(node, attr)
def new_last_node(self) -> Node:
"""Create and return a new terminal node."""
old_last = self.prv
new_last = Node(old_last, self)
old_last.nxt = self.prv = new_last
return new_last
class OrderedBidictBase(BidictBase[KT, VT]):
"""Base class implementing an ordered :class:`BidirectionalMapping`."""
_node_by_korv: bidict[t.Any, Node]
_bykey: bool
def __init__(self, arg: MapOrItems[KT, VT] = (), /, **kw: VT) -> None:
"""Make a new ordered bidirectional mapping.
The signature behaves like that of :class:`dict`.
Items passed in are added in the order they are passed,
respecting the :attr:`~bidict.BidictBase.on_dup`
class attribute in the process.
The order in which items are inserted is remembered,
similar to :class:`collections.OrderedDict`.
"""
self._sntl = SentinelNode()
self._node_by_korv = bidict()
self._bykey = True
super().__init__(arg, **kw)
if t.TYPE_CHECKING:
@property
def inverse(self) -> OrderedBidictBase[VT, KT]: ...
@property
def inv(self) -> OrderedBidictBase[VT, KT]: ...
def _make_inverse(self) -> OrderedBidictBase[VT, KT]:
inv = t.cast(OrderedBidictBase[VT, KT], super()._make_inverse())
inv._sntl = self._sntl
inv._node_by_korv = self._node_by_korv
inv._bykey = not self._bykey
return inv
def _assoc_node(self, node: Node, key: KT, val: VT) -> None:
korv = key if self._bykey else val
self._node_by_korv.forceput(korv, node)
def _dissoc_node(self, node: Node) -> None:
del self._node_by_korv.inverse[node]
node.unlink()
def _init_from(self, other: MapOrItems[KT, VT]) -> None:
"""See :meth:`BidictBase._init_from`."""
super()._init_from(other)
bykey = self._bykey
korv_by_node = self._node_by_korv.inverse
korv_by_node.clear()
korv_by_node_set = korv_by_node.__setitem__
self._sntl.nxt = self._sntl.prv = self._sntl
new_node = self._sntl.new_last_node
for k, v in iteritems(other):
korv_by_node_set(new_node(), k if bykey else v)
def _write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], unwrites: Unwrites | None) -> None:
"""See :meth:`bidict.BidictBase._spec_write`."""
super()._write(newkey, newval, oldkey, oldval, unwrites)
assoc, dissoc = self._assoc_node, self._dissoc_node
node_by_korv, bykey = self._node_by_korv, self._bykey
if oldval is MISSING and oldkey is MISSING: # no key or value duplication
# {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
newnode = self._sntl.new_last_node()
assoc(newnode, newkey, newval)
if unwrites is not None:
unwrites.append((dissoc, newnode))
elif oldval is not MISSING and oldkey is not MISSING: # key and value duplication across two different items
# {0: 1, 2: 3} | {0: 3} => {0: 3}
# n1, n2 => n1 (collapse n1 and n2 into n1)
# oldkey: 2, oldval: 1, oldnode: n2, newkey: 0, newval: 3, newnode: n1
if bykey:
oldnode = node_by_korv[oldkey]
newnode = node_by_korv[newkey]
else:
oldnode = node_by_korv[newval]
newnode = node_by_korv[oldval]
dissoc(oldnode)
assoc(newnode, newkey, newval)
if unwrites is not None:
unwrites.extend((
(assoc, newnode, newkey, oldval),
(assoc, oldnode, oldkey, newval),
(oldnode.relink,),
))
elif oldval is not MISSING: # just key duplication
# {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
# oldkey: MISSING, oldval: 3, newkey: 2, newval: 4
node = node_by_korv[newkey if bykey else oldval]
assoc(node, newkey, newval)
if unwrites is not None:
unwrites.append((assoc, node, newkey, oldval))
else:
assert oldkey is not MISSING # just value duplication
# {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
# oldkey: 2, oldval: MISSING, newkey: 4, newval: 3
node = node_by_korv[oldkey if bykey else newval]
assoc(node, newkey, newval)
if unwrites is not None:
unwrites.append((assoc, node, oldkey, newval))
def __iter__(self) -> t.Iterator[KT]:
"""Iterator over the contained keys in insertion order."""
return self._iter(reverse=False)
def __reversed__(self) -> t.Iterator[KT]:
"""Iterator over the contained keys in reverse insertion order."""
return self._iter(reverse=True)
def _iter(self, *, reverse: bool = False) -> t.Iterator[KT]:
nodes = self._sntl.iternodes(reverse=reverse)
korv_by_node = self._node_by_korv.inverse
if self._bykey:
for node in nodes:
yield korv_by_node[node]
else:
key_by_val = self._invm
for node in nodes:
val = korv_by_node[node]
yield key_by_val[val]
# * Code review nav *
# ============================================================================
# ← Prev: _bidict.py Current: _orderedbase.py Next: _orderedbidict.py →
# ============================================================================

View File

@ -0,0 +1,172 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# * Code review nav *
# (see comments in __init__.py)
# ============================================================================
# ← Prev: _orderedbase.py Current: _orderedbidict.py <FIN>
# ============================================================================
"""Provide :class:`OrderedBidict`."""
from __future__ import annotations
import typing as t
from collections.abc import Set
from ._base import BidictKeysView
from ._bidict import MutableBidict
from ._orderedbase import OrderedBidictBase
from ._typing import KT
from ._typing import VT
class OrderedBidict(OrderedBidictBase[KT, VT], MutableBidict[KT, VT]):
"""Mutable bidict type that maintains items in insertion order."""
if t.TYPE_CHECKING:
@property
def inverse(self) -> OrderedBidict[VT, KT]: ...
@property
def inv(self) -> OrderedBidict[VT, KT]: ...
def clear(self) -> None:
"""Remove all items."""
super().clear()
self._node_by_korv.clear()
self._sntl.nxt = self._sntl.prv = self._sntl
def _pop(self, key: KT) -> VT:
val = super()._pop(key)
node = self._node_by_korv[key if self._bykey else val]
self._dissoc_node(node)
return val
def popitem(self, last: bool = True) -> tuple[KT, VT]:
"""*b.popitem() → (k, v)*
If *last* is true,
remove and return the most recently added item as a (key, value) pair.
Otherwise, remove and return the least recently added item.
:raises KeyError: if *b* is empty.
"""
if not self:
raise KeyError('OrderedBidict is empty')
node = getattr(self._sntl, 'prv' if last else 'nxt')
korv = self._node_by_korv.inverse[node]
if self._bykey:
return korv, self._pop(korv)
return self.inverse._pop(korv), korv
def move_to_end(self, key: KT, last: bool = True) -> None:
"""Move the item with the given key to the end if *last* is true, else to the beginning.
:raises KeyError: if *key* is missing
"""
korv = key if self._bykey else self._fwdm[key]
node = self._node_by_korv[korv]
node.prv.nxt = node.nxt
node.nxt.prv = node.prv
sntl = self._sntl
if last:
lastnode = sntl.prv
node.prv = lastnode
node.nxt = sntl
sntl.prv = lastnode.nxt = node
else:
firstnode = sntl.nxt
node.prv = sntl
node.nxt = firstnode
sntl.nxt = firstnode.prv = node
# Override the keys() and items() implementations inherited from BidictBase,
# which may delegate to the backing _fwdm dict, since this is a mutable ordered bidict,
# and therefore the ordering of items can get out of sync with the backing mappings
# after mutation. (Need not override values() because it delegates to .inverse.keys().)
def keys(self) -> t.KeysView[KT]:
"""A set-like object providing a view on the contained keys."""
return _OrderedBidictKeysView(self)
def items(self) -> t.ItemsView[KT, VT]:
"""A set-like object providing a view on the contained items."""
return _OrderedBidictItemsView(self)
# The following MappingView implementations use the __iter__ implementations
# inherited from their superclass counterparts in collections.abc, so they
# continue to yield items in the correct order even after an OrderedBidict
# is mutated. They also provide a __reversed__ implementation, which is not
# provided by the collections.abc superclasses.
class _OrderedBidictKeysView(BidictKeysView[KT]):
_mapping: OrderedBidict[KT, t.Any]
def __reversed__(self) -> t.Iterator[KT]:
return reversed(self._mapping)
class _OrderedBidictItemsView(t.ItemsView[KT, VT]):
_mapping: OrderedBidict[KT, VT]
def __reversed__(self) -> t.Iterator[tuple[KT, VT]]:
ob = self._mapping
for key in reversed(ob):
yield key, ob[key]
# For better performance, make _OrderedBidictKeysView and _OrderedBidictItemsView delegate
# to backing dicts for the methods they inherit from collections.abc.Set. (Cannot delegate
# for __iter__ and __reversed__ since they are order-sensitive.) See also: https://bugs.python.org/issue46713
_OView = t.Union[t.Type[_OrderedBidictKeysView[KT]], t.Type[_OrderedBidictItemsView[KT, t.Any]]]
_setmethodnames: t.Iterable[str] = (
'__lt__ __le__ __gt__ __ge__ __eq__ __ne__ __sub__ __rsub__ '
'__or__ __ror__ __xor__ __rxor__ __and__ __rand__ isdisjoint'
).split()
def _override_set_methods_to_use_backing_dict(cls: _OView[KT], viewname: str) -> None:
def make_proxy_method(methodname: str) -> t.Any:
def method(self: _OrderedBidictKeysView[KT] | _OrderedBidictItemsView[KT, t.Any], *args: t.Any) -> t.Any:
fwdm = self._mapping._fwdm
if not isinstance(fwdm, dict): # dict view speedup not available, fall back to Set's implementation.
return getattr(Set, methodname)(self, *args)
fwdm_dict_view = getattr(fwdm, viewname)()
fwdm_dict_view_method = getattr(fwdm_dict_view, methodname)
if (
len(args) != 1
or not isinstance((arg := args[0]), self.__class__)
or not isinstance(arg._mapping._fwdm, dict)
):
return fwdm_dict_view_method(*args)
# self and arg are both _OrderedBidictKeysViews or _OrderedBidictItemsViews whose bidicts are backed by
# a dict. Use arg's backing dict's corresponding view instead of arg. Otherwise, e.g. `ob1.keys()
# < ob2.keys()` would give "TypeError: '<' not supported between instances of '_OrderedBidictKeysView' and
# '_OrderedBidictKeysView'", because both `dict_keys(ob1).__lt__(ob2.keys()) is NotImplemented` and
# `dict_keys(ob2).__gt__(ob1.keys()) is NotImplemented`.
arg_dict = arg._mapping._fwdm
arg_dict_view = getattr(arg_dict, viewname)()
return fwdm_dict_view_method(arg_dict_view)
method.__name__ = methodname
method.__qualname__ = f'{cls.__qualname__}.{methodname}'
return method
for name in _setmethodnames:
setattr(cls, name, make_proxy_method(name))
_override_set_methods_to_use_backing_dict(_OrderedBidictKeysView, 'keys')
_override_set_methods_to_use_backing_dict(_OrderedBidictItemsView, 'items')
# * Code review nav *
# ============================================================================
# ← Prev: _orderedbase.py Current: _orderedbidict.py <FIN>
# ============================================================================

View File

@ -0,0 +1,49 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Provide typing-related objects."""
from __future__ import annotations
import typing as t
from enum import Enum
KT = t.TypeVar('KT')
VT = t.TypeVar('VT')
VT_co = t.TypeVar('VT_co', covariant=True)
Items = t.Iterable[t.Tuple[KT, VT]]
@t.runtime_checkable
class Maplike(t.Protocol[KT, VT_co]):
"""Like typeshed's SupportsKeysAndGetItem, but usable at runtime."""
def keys(self) -> t.Iterable[KT]: ...
def __getitem__(self, __key: KT) -> VT_co: ...
MapOrItems = t.Union[Maplike[KT, VT], Items[KT, VT]]
MappOrItems = t.Union[t.Mapping[KT, VT], Items[KT, VT]]
ItemsIter = t.Iterator[t.Tuple[KT, VT]]
class MissingT(Enum):
"""Sentinel used to represent none/missing when None itself can't be used."""
MISSING = 'MISSING'
MISSING: t.Final[t.Literal[MissingT.MISSING]] = MissingT.MISSING
OKT = t.Union[KT, MissingT] #: optional key type
OVT = t.Union[VT, MissingT] #: optional value type
DT = t.TypeVar('DT') #: for default arguments
ODT = t.Union[DT, MissingT] #: optional default arg type

View File

@ -0,0 +1,14 @@
# Copyright 2009-2024 Joshua Bronson. All rights reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Define bidict package metadata."""
__version__ = '0.23.1'
__author__ = {'name': 'Joshua Bronson', 'email': 'jabronson@gmail.com'}
__copyright__ = '© 2009-2024 Joshua Bronson'
__description__ = 'The bidirectional mapping library for Python.'
__license__ = 'MPL 2.0'
__url__ = 'https://bidict.readthedocs.io'

View File

@ -0,0 +1 @@
PEP-561 marker.

View File

@ -0,0 +1,78 @@
Metadata-Version: 2.4
Name: certifi
Version: 2025.10.5
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: https://github.com/certifi/python-certifi
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: MPL-2.0
Project-URL: Source, https://github.com/certifi/python-certifi
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Requires-Python: >=3.7
License-File: LICENSE
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: home-page
Dynamic: license
Dynamic: license-file
Dynamic: project-url
Dynamic: requires-python
Dynamic: summary
Certifi: Python SSL Certificates
================================
Certifi provides Mozilla's carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
``certifi`` is available on PyPI. Simply install it with ``pip``::
$ pip install certifi
Usage
-----
To reference the installed certificate authority (CA) bundle, you can use the
built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
Or from the command line::
$ python -m certifi
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
Enjoy!
.. _`Requests`: https://requests.readthedocs.io/en/master/
Addition/Removal of Certificates
--------------------------------
Certifi does not support any addition/removal or other modification of the
CA trust store content. This project is intended to provide a reliable and
highly portable root of trust to python deployments. Look to upstream projects
for methods to use alternate trust.

View File

@ -0,0 +1,14 @@
certifi-2025.10.5.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
certifi-2025.10.5.dist-info/METADATA,sha256=RzyR4sT6xRN1pNNy24IHVOlZuDJh1BNfaMa04zEadtk,2474
certifi-2025.10.5.dist-info/RECORD,,
certifi-2025.10.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
certifi-2025.10.5.dist-info/licenses/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
certifi-2025.10.5.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
certifi/__init__.py,sha256=jWkaYHMk4oIPSSBEK5bLMbO_qrkyNm_cRFx-D16-3Ks,94
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
certifi/__pycache__/__init__.cpython-312.pyc,,
certifi/__pycache__/__main__.cpython-312.pyc,,
certifi/__pycache__/core.cpython-312.pyc,,
certifi/cacert.pem,sha256=IIn8WiWDZAH67pn3IkYLAbOTmZdGoPuBeUNmbW7MBFg,291366
certifi/core.py,sha256=XFXycndG5pf37ayeF8N32HUuDafsyhkVMbO4BAPWHa0,3394
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

View File

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: setuptools (80.9.0)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@ -0,0 +1,20 @@
This package contains a modified version of ca-bundle.crt:
ca-bundle.crt -- Bundle of CA Root Certificates
This is a bundle of X.509 certificates of public Certificate Authorities
(CA). These were automatically extracted from Mozilla's root certificates
file (certdata.txt). This file can be found in the mozilla source tree:
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
It contains the certificates in PEM format and therefore
can be directly used with curl / libcurl / php_curl, or with
an Apache+mod_ssl webserver for SSL client authentication.
Just configure this file as the SSLCACertificateFile.#
***** BEGIN LICENSE BLOCK *****
This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at http://mozilla.org/MPL/2.0/.
***** END LICENSE BLOCK *****
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $

View File

@ -0,0 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2025.10.05"

View File

@ -0,0 +1,12 @@
import argparse
from certifi import contents, where
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--contents", action="store_true")
args = parser.parse_args()
if args.contents:
print(contents())
else:
print(where())

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,83 @@
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem or its contents.
"""
import sys
import atexit
def exit_cacert_ctx() -> None:
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
if sys.version_info >= (3, 11):
from importlib.resources import as_file, files
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the file
# in cases where we're inside of a zipimport situation until someone
# actually calls where(), but we don't want to re-extract the file
# on every call of where(), so we'll do it once then store it in a
# global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you to
# manage the cleanup of this file, so it doesn't actually return a
# path, it returns a context manager that will give you the path
# when you enter it and will do any cleanup when you leave it. In
# the common case of not needing a temporary file, it will just
# return the file system location and the __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
else:
from importlib.resources import path as get_path, read_text
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the
# file in cases where we're inside of a zipimport situation until
# someone actually calls where(), but we don't want to re-extract
# the file on every call of where(), so we'll do it once then store
# it in a global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you
# to manage the cleanup of this file, so it doesn't actually
# return a path, it returns a context manager that will give
# you the path when you enter it and will do any cleanup when
# you leave it. In the common case of not needing a temporary
# file, it will just return the file system location and the
# __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = get_path("certifi", "cacert.pem")
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return read_text("certifi", "cacert.pem", encoding="ascii")

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,35 @@
../../../bin/normalizer,sha256=Jt4MK7XjOjSbMDL0OGi5k_9R59XZFpgc2Gqlgf4GbGk,271
charset_normalizer-3.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
charset_normalizer-3.4.4.dist-info/METADATA,sha256=jVuUFBti8dav19YLvWissTihVdF2ozUY4KKMw7jdkBQ,37303
charset_normalizer-3.4.4.dist-info/RECORD,,
charset_normalizer-3.4.4.dist-info/WHEEL,sha256=DxRnWQz-Kp9-4a4hdDHsSv0KUC3H7sN9Nbef3-8RjXU,190
charset_normalizer-3.4.4.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
charset_normalizer-3.4.4.dist-info/licenses/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071
charset_normalizer-3.4.4.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590
charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109
charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
charset_normalizer/__pycache__/api.cpython-312.pyc,,
charset_normalizer/__pycache__/cd.cpython-312.pyc,,
charset_normalizer/__pycache__/constant.cpython-312.pyc,,
charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
charset_normalizer/__pycache__/md.cpython-312.pyc,,
charset_normalizer/__pycache__/models.cpython-312.pyc,,
charset_normalizer/__pycache__/utils.cpython-312.pyc,,
charset_normalizer/__pycache__/version.cpython-312.pyc,,
charset_normalizer/api.py,sha256=V07i8aVeCD8T2fSia3C-fn0i9t8qQguEBhsqszg32Ns,22668
charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522
charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136
charset_normalizer/cli/__main__.py,sha256=dMaXG6IJXRvqq8z2tig7Qb83-BpWTln55ooiku5_uvg,12646
charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
charset_normalizer/constant.py,sha256=7UVY4ldYhmQMHUdgQ_sgZmzcQ0xxYxpBunqSZ-XJZ8U,42713
charset_normalizer/legacy.py,sha256=sYBzSpzsRrg_wF4LP536pG64BItw7Tqtc3SMQAHvFLM,2731
charset_normalizer/md.cpython-312-x86_64-linux-gnu.so,sha256=sZ7umtJLjKfA83NFJ7npkiDyr06zDT8cWtl6uIx2MsM,15912
charset_normalizer/md.py,sha256=-_oN3h3_X99nkFfqamD3yu45DC_wfk5odH0Tr_CQiXs,20145
charset_normalizer/md__mypyc.cpython-312-x86_64-linux-gnu.so,sha256=J2WWgLBQiO8sqdFsENp9u5V9uEH0tTwvTLszPdqhsv0,290584
charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
charset_normalizer/utils.py,sha256=sTejPgrdlNsKNucZfJCxJ95lMTLA0ShHLLE3n5wpT9Q,12170
charset_normalizer/version.py,sha256=nKE4qBNk5WA4LIJ_yIH_aSDfvtsyizkWMg-PUG-UZVk,115

View File

@ -0,0 +1,7 @@
Wheel-Version: 1.0
Generator: setuptools (80.9.0)
Root-Is-Purelib: false
Tag: cp312-cp312-manylinux_2_17_x86_64
Tag: cp312-cp312-manylinux2014_x86_64
Tag: cp312-cp312-manylinux_2_28_x86_64

View File

@ -0,0 +1,2 @@
[console_scripts]
normalizer = charset_normalizer.cli:cli_detect

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1 @@
charset_normalizer

View File

@ -0,0 +1,48 @@
"""
Charset-Normalizer
~~~~~~~~~~~~~~
The Real First Universal Charset Detector.
A library that helps you read text from an unknown charset encoding.
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
All IANA character set names for which the Python core library provides codecs are supported.
Basic usage:
>>> from charset_normalizer import from_bytes
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
>>> best_guess = results.best()
>>> str(best_guess)
'Bсеки човек има право на образование. Oбразованието!'
Others methods and usages are available - see the full documentation
at <https://github.com/Ousret/charset_normalizer>.
:copyright: (c) 2021 by Ahmed TAHRI
:license: MIT, see LICENSE for more details.
"""
from __future__ import annotations
import logging
from .api import from_bytes, from_fp, from_path, is_binary
from .legacy import detect
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
from .version import VERSION, __version__
__all__ = (
"from_fp",
"from_path",
"from_bytes",
"is_binary",
"detect",
"CharsetMatch",
"CharsetMatches",
"__version__",
"VERSION",
"set_logging_handler",
)
# Attach a NullHandler to the top level logger by default
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())

View File

@ -0,0 +1,6 @@
from __future__ import annotations
from .cli import cli_detect
if __name__ == "__main__":
cli_detect()

View File

@ -0,0 +1,669 @@
from __future__ import annotations
import logging
from os import PathLike
from typing import BinaryIO
from .cd import (
coherence_ratio,
encoding_languages,
mb_encoding_languages,
merge_coherence_ratios,
)
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
from .md import mess_ratio
from .models import CharsetMatch, CharsetMatches
from .utils import (
any_specified_encoding,
cut_sequence_chunks,
iana_name,
identify_sig_or_bom,
is_cp_similar,
is_multi_byte_encoding,
should_strip_sig_or_bom,
)
logger = logging.getLogger("charset_normalizer")
explain_handler = logging.StreamHandler()
explain_handler.setFormatter(
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
)
def from_bytes(
sequences: bytes | bytearray,
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
If there is no results, it is a strong indicator that the source is binary/not text.
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
but never take it for granted. Can improve the performance.
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
purpose.
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
Custom logging format and handler can be set manually.
"""
if not isinstance(sequences, (bytearray, bytes)):
raise TypeError(
"Expected object of type bytes or bytearray, got: {}".format(
type(sequences)
)
)
if explain:
previous_logger_level: int = logger.level
logger.addHandler(explain_handler)
logger.setLevel(TRACE)
length: int = len(sequences)
if length == 0:
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level or logging.WARNING)
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
if cp_isolation is not None:
logger.log(
TRACE,
"cp_isolation is set. use this flag for debugging purpose. "
"limited list of encoding allowed : %s.",
", ".join(cp_isolation),
)
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
else:
cp_isolation = []
if cp_exclusion is not None:
logger.log(
TRACE,
"cp_exclusion is set. use this flag for debugging purpose. "
"limited list of encoding excluded : %s.",
", ".join(cp_exclusion),
)
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
else:
cp_exclusion = []
if length <= (chunk_size * steps):
logger.log(
TRACE,
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
steps,
chunk_size,
length,
)
steps = 1
chunk_size = length
if steps > 1 and length / steps < chunk_size:
chunk_size = int(length / steps)
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
if is_too_small_sequence:
logger.log(
TRACE,
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
length
),
)
elif is_too_large_sequence:
logger.log(
TRACE,
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
length
),
)
prioritized_encodings: list[str] = []
specified_encoding: str | None = (
any_specified_encoding(sequences) if preemptive_behaviour else None
)
if specified_encoding is not None:
prioritized_encodings.append(specified_encoding)
logger.log(
TRACE,
"Detected declarative mark in sequence. Priority +1 given for %s.",
specified_encoding,
)
tested: set[str] = set()
tested_but_hard_failure: list[str] = []
tested_but_soft_failure: list[str] = []
fallback_ascii: CharsetMatch | None = None
fallback_u8: CharsetMatch | None = None
fallback_specified: CharsetMatch | None = None
results: CharsetMatches = CharsetMatches()
early_stop_results: CharsetMatches = CharsetMatches()
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
if sig_encoding is not None:
prioritized_encodings.append(sig_encoding)
logger.log(
TRACE,
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
len(sig_payload),
sig_encoding,
)
prioritized_encodings.append("ascii")
if "utf_8" not in prioritized_encodings:
prioritized_encodings.append("utf_8")
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
if cp_isolation and encoding_iana not in cp_isolation:
continue
if cp_exclusion and encoding_iana in cp_exclusion:
continue
if encoding_iana in tested:
continue
tested.add(encoding_iana)
decoded_payload: str | None = None
bom_or_sig_available: bool = sig_encoding == encoding_iana
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
encoding_iana
)
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
logger.log(
TRACE,
"Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
encoding_iana,
)
continue
if encoding_iana in {"utf_7"} and not bom_or_sig_available:
logger.log(
TRACE,
"Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
encoding_iana,
)
continue
try:
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
except (ModuleNotFoundError, ImportError):
logger.log(
TRACE,
"Encoding %s does not provide an IncrementalDecoder",
encoding_iana,
)
continue
try:
if is_too_large_sequence and is_multi_byte_decoder is False:
str(
(
sequences[: int(50e4)]
if strip_sig_or_bom is False
else sequences[len(sig_payload) : int(50e4)]
),
encoding=encoding_iana,
)
else:
decoded_payload = str(
(
sequences
if strip_sig_or_bom is False
else sequences[len(sig_payload) :]
),
encoding=encoding_iana,
)
except (UnicodeDecodeError, LookupError) as e:
if not isinstance(e, LookupError):
logger.log(
TRACE,
"Code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
continue
similar_soft_failure_test: bool = False
for encoding_soft_failed in tested_but_soft_failure:
if is_cp_similar(encoding_iana, encoding_soft_failed):
similar_soft_failure_test = True
break
if similar_soft_failure_test:
logger.log(
TRACE,
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
encoding_iana,
encoding_soft_failed,
)
continue
r_ = range(
0 if not bom_or_sig_available else len(sig_payload),
length,
int(length / steps),
)
multi_byte_bonus: bool = (
is_multi_byte_decoder
and decoded_payload is not None
and len(decoded_payload) < length
)
if multi_byte_bonus:
logger.log(
TRACE,
"Code page %s is a multi byte encoding table and it appear that at least one character "
"was encoded using n-bytes.",
encoding_iana,
)
max_chunk_gave_up: int = int(len(r_) / 4)
max_chunk_gave_up = max(max_chunk_gave_up, 2)
early_stop_count: int = 0
lazy_str_hard_failure = False
md_chunks: list[str] = []
md_ratios = []
try:
for chunk in cut_sequence_chunks(
sequences,
encoding_iana,
r_,
chunk_size,
bom_or_sig_available,
strip_sig_or_bom,
sig_payload,
is_multi_byte_decoder,
decoded_payload,
):
md_chunks.append(chunk)
md_ratios.append(
mess_ratio(
chunk,
threshold,
explain is True and 1 <= len(cp_isolation) <= 2,
)
)
if md_ratios[-1] >= threshold:
early_stop_count += 1
if (early_stop_count >= max_chunk_gave_up) or (
bom_or_sig_available and strip_sig_or_bom is False
):
break
except (
UnicodeDecodeError
) as e: # Lazy str loading may have missed something there
logger.log(
TRACE,
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
early_stop_count = max_chunk_gave_up
lazy_str_hard_failure = True
# We might want to check the sequence again with the whole content
# Only if initial MD tests passes
if (
not lazy_str_hard_failure
and is_too_large_sequence
and not is_multi_byte_decoder
):
try:
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
except UnicodeDecodeError as e:
logger.log(
TRACE,
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
continue
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
tested_but_soft_failure.append(encoding_iana)
logger.log(
TRACE,
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
"Computed mean chaos is %f %%.",
encoding_iana,
early_stop_count,
round(mean_mess_ratio * 100, ndigits=3),
)
# Preparing those fallbacks in case we got nothing.
if (
enable_fallback
and encoding_iana
in ["ascii", "utf_8", specified_encoding, "utf_16", "utf_32"]
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
sequences,
encoding_iana,
threshold,
bom_or_sig_available,
[],
decoded_payload,
preemptive_declaration=specified_encoding,
)
if encoding_iana == specified_encoding:
fallback_specified = fallback_entry
elif encoding_iana == "ascii":
fallback_ascii = fallback_entry
else:
fallback_u8 = fallback_entry
continue
logger.log(
TRACE,
"%s passed initial chaos probing. Mean measured chaos is %f %%",
encoding_iana,
round(mean_mess_ratio * 100, ndigits=3),
)
if not is_multi_byte_decoder:
target_languages: list[str] = encoding_languages(encoding_iana)
else:
target_languages = mb_encoding_languages(encoding_iana)
if target_languages:
logger.log(
TRACE,
"{} should target any language(s) of {}".format(
encoding_iana, str(target_languages)
),
)
cd_ratios = []
# We shall skip the CD when its about ASCII
# Most of the time its not relevant to run "language-detection" on it.
if encoding_iana != "ascii":
for chunk in md_chunks:
chunk_languages = coherence_ratio(
chunk,
language_threshold,
",".join(target_languages) if target_languages else None,
)
cd_ratios.append(chunk_languages)
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
if cd_ratios_merged:
logger.log(
TRACE,
"We detected language {} using {}".format(
cd_ratios_merged, encoding_iana
),
)
current_match = CharsetMatch(
sequences,
encoding_iana,
mean_mess_ratio,
bom_or_sig_available,
cd_ratios_merged,
(
decoded_payload
if (
is_too_large_sequence is False
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
)
else None
),
preemptive_declaration=specified_encoding,
)
results.append(current_match)
if (
encoding_iana in [specified_encoding, "ascii", "utf_8"]
and mean_mess_ratio < 0.1
):
# If md says nothing to worry about, then... stop immediately!
if mean_mess_ratio == 0.0:
logger.debug(
"Encoding detection: %s is most likely the one.",
current_match.encoding,
)
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([current_match])
early_stop_results.append(current_match)
if (
len(early_stop_results)
and (specified_encoding is None or specified_encoding in tested)
and "ascii" in tested
and "utf_8" in tested
):
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
logger.debug(
"Encoding detection: %s is most likely the one.",
probable_result.encoding,
)
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([probable_result])
if encoding_iana == sig_encoding:
logger.debug(
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
"the beginning of the sequence.",
encoding_iana,
)
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([results[encoding_iana]])
if len(results) == 0:
if fallback_u8 or fallback_ascii or fallback_specified:
logger.log(
TRACE,
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
)
if fallback_specified:
logger.debug(
"Encoding detection: %s will be used as a fallback match",
fallback_specified.encoding,
)
results.append(fallback_specified)
elif (
(fallback_u8 and fallback_ascii is None)
or (
fallback_u8
and fallback_ascii
and fallback_u8.fingerprint != fallback_ascii.fingerprint
)
or (fallback_u8 is not None)
):
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
results.append(fallback_u8)
elif fallback_ascii:
logger.debug("Encoding detection: ascii will be used as a fallback match")
results.append(fallback_ascii)
if results:
logger.debug(
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
results.best().encoding, # type: ignore
len(results) - 1,
)
else:
logger.debug("Encoding detection: Unable to determine any suitable charset.")
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return results
def from_fp(
fp: BinaryIO,
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but using a file pointer that is already ready.
Will not close the file pointer.
"""
return from_bytes(
fp.read(),
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def from_path(
path: str | bytes | PathLike, # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
Can raise IOError.
"""
with open(path, "rb") as fp:
return from_fp(
fp,
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def is_binary(
fp_or_path_or_payload: PathLike | str | BinaryIO | bytes, # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = False,
) -> bool:
"""
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
"""
if isinstance(fp_or_path_or_payload, (str, PathLike)):
guesses = from_path(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
elif isinstance(
fp_or_path_or_payload,
(
bytes,
bytearray,
),
):
guesses = from_bytes(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
else:
guesses = from_fp(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
return not guesses

View File

@ -0,0 +1,395 @@
from __future__ import annotations
import importlib
from codecs import IncrementalDecoder
from collections import Counter
from functools import lru_cache
from typing import Counter as TypeCounter
from .constant import (
FREQUENCIES,
KO_NAMES,
LANGUAGE_SUPPORTED_COUNT,
TOO_SMALL_SEQUENCE,
ZH_NAMES,
)
from .md import is_suspiciously_successive_range
from .models import CoherenceMatches
from .utils import (
is_accentuated,
is_latin,
is_multi_byte_encoding,
is_unicode_range_secondary,
unicode_range,
)
def encoding_unicode_range(iana_name: str) -> list[str]:
"""
Return associated unicode ranges in a single byte code page.
"""
if is_multi_byte_encoding(iana_name):
raise OSError("Function not supported on multi-byte code page")
decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder
p: IncrementalDecoder = decoder(errors="ignore")
seen_ranges: dict[str, int] = {}
character_count: int = 0
for i in range(0x40, 0xFF):
chunk: str = p.decode(bytes([i]))
if chunk:
character_range: str | None = unicode_range(chunk)
if character_range is None:
continue
if is_unicode_range_secondary(character_range) is False:
if character_range not in seen_ranges:
seen_ranges[character_range] = 0
seen_ranges[character_range] += 1
character_count += 1
return sorted(
[
character_range
for character_range in seen_ranges
if seen_ranges[character_range] / character_count >= 0.15
]
)
def unicode_range_languages(primary_range: str) -> list[str]:
"""
Return inferred languages used with a unicode range.
"""
languages: list[str] = []
for language, characters in FREQUENCIES.items():
for character in characters:
if unicode_range(character) == primary_range:
languages.append(language)
break
return languages
@lru_cache()
def encoding_languages(iana_name: str) -> list[str]:
"""
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
This function does the correspondence.
"""
unicode_ranges: list[str] = encoding_unicode_range(iana_name)
primary_range: str | None = None
for specified_range in unicode_ranges:
if "Latin" not in specified_range:
primary_range = specified_range
break
if primary_range is None:
return ["Latin Based"]
return unicode_range_languages(primary_range)
@lru_cache()
def mb_encoding_languages(iana_name: str) -> list[str]:
"""
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
This function does the correspondence.
"""
if (
iana_name.startswith("shift_")
or iana_name.startswith("iso2022_jp")
or iana_name.startswith("euc_j")
or iana_name == "cp932"
):
return ["Japanese"]
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
return ["Chinese"]
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
return ["Korean"]
return []
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
def get_target_features(language: str) -> tuple[bool, bool]:
"""
Determine main aspects from a supported language if it contains accents and if is pure Latin.
"""
target_have_accents: bool = False
target_pure_latin: bool = True
for character in FREQUENCIES[language]:
if not target_have_accents and is_accentuated(character):
target_have_accents = True
if target_pure_latin and is_latin(character) is False:
target_pure_latin = False
return target_have_accents, target_pure_latin
def alphabet_languages(
characters: list[str], ignore_non_latin: bool = False
) -> list[str]:
"""
Return associated languages associated to given characters.
"""
languages: list[tuple[str, float]] = []
source_have_accents = any(is_accentuated(character) for character in characters)
for language, language_characters in FREQUENCIES.items():
target_have_accents, target_pure_latin = get_target_features(language)
if ignore_non_latin and target_pure_latin is False:
continue
if target_have_accents is False and source_have_accents:
continue
character_count: int = len(language_characters)
character_match_count: int = len(
[c for c in language_characters if c in characters]
)
ratio: float = character_match_count / character_count
if ratio >= 0.2:
languages.append((language, ratio))
languages = sorted(languages, key=lambda x: x[1], reverse=True)
return [compatible_language[0] for compatible_language in languages]
def characters_popularity_compare(
language: str, ordered_characters: list[str]
) -> float:
"""
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
"""
if language not in FREQUENCIES:
raise ValueError(f"{language} not available")
character_approved_count: int = 0
FREQUENCIES_language_set = set(FREQUENCIES[language])
ordered_characters_count: int = len(ordered_characters)
target_language_characters_count: int = len(FREQUENCIES[language])
large_alphabet: bool = target_language_characters_count > 26
for character, character_rank in zip(
ordered_characters, range(0, ordered_characters_count)
):
if character not in FREQUENCIES_language_set:
continue
character_rank_in_language: int = FREQUENCIES[language].index(character)
expected_projection_ratio: float = (
target_language_characters_count / ordered_characters_count
)
character_rank_projection: int = int(character_rank * expected_projection_ratio)
if (
large_alphabet is False
and abs(character_rank_projection - character_rank_in_language) > 4
):
continue
if (
large_alphabet is True
and abs(character_rank_projection - character_rank_in_language)
< target_language_characters_count / 3
):
character_approved_count += 1
continue
characters_before_source: list[str] = FREQUENCIES[language][
0:character_rank_in_language
]
characters_after_source: list[str] = FREQUENCIES[language][
character_rank_in_language:
]
characters_before: list[str] = ordered_characters[0:character_rank]
characters_after: list[str] = ordered_characters[character_rank:]
before_match_count: int = len(
set(characters_before) & set(characters_before_source)
)
after_match_count: int = len(
set(characters_after) & set(characters_after_source)
)
if len(characters_before_source) == 0 and before_match_count <= 4:
character_approved_count += 1
continue
if len(characters_after_source) == 0 and after_match_count <= 4:
character_approved_count += 1
continue
if (
before_match_count / len(characters_before_source) >= 0.4
or after_match_count / len(characters_after_source) >= 0.4
):
character_approved_count += 1
continue
return character_approved_count / len(ordered_characters)
def alpha_unicode_split(decoded_sequence: str) -> list[str]:
"""
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
One containing the latin letters and the other hebrew.
"""
layers: dict[str, str] = {}
for character in decoded_sequence:
if character.isalpha() is False:
continue
character_range: str | None = unicode_range(character)
if character_range is None:
continue
layer_target_range: str | None = None
for discovered_range in layers:
if (
is_suspiciously_successive_range(discovered_range, character_range)
is False
):
layer_target_range = discovered_range
break
if layer_target_range is None:
layer_target_range = character_range
if layer_target_range not in layers:
layers[layer_target_range] = character.lower()
continue
layers[layer_target_range] += character.lower()
return list(layers.values())
def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches:
"""
This function merge results previously given by the function coherence_ratio.
The return type is the same as coherence_ratio.
"""
per_language_ratios: dict[str, list[float]] = {}
for result in results:
for sub_result in result:
language, ratio = sub_result
if language not in per_language_ratios:
per_language_ratios[language] = [ratio]
continue
per_language_ratios[language].append(ratio)
merge = [
(
language,
round(
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
4,
),
)
for language in per_language_ratios
]
return sorted(merge, key=lambda x: x[1], reverse=True)
def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
"""
We shall NOT return "English—" in CoherenceMatches because it is an alternative
of "English". This function only keeps the best match and remove the em-dash in it.
"""
index_results: dict[str, list[float]] = dict()
for result in results:
language, ratio = result
no_em_name: str = language.replace("", "")
if no_em_name not in index_results:
index_results[no_em_name] = []
index_results[no_em_name].append(ratio)
if any(len(index_results[e]) > 1 for e in index_results):
filtered_results: CoherenceMatches = []
for language in index_results:
filtered_results.append((language, max(index_results[language])))
return filtered_results
return results
@lru_cache(maxsize=2048)
def coherence_ratio(
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None
) -> CoherenceMatches:
"""
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
A layer = Character extraction by alphabets/ranges.
"""
results: list[tuple[str, float]] = []
ignore_non_latin: bool = False
sufficient_match_count: int = 0
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
if "Latin Based" in lg_inclusion_list:
ignore_non_latin = True
lg_inclusion_list.remove("Latin Based")
for layer in alpha_unicode_split(decoded_sequence):
sequence_frequencies: TypeCounter[str] = Counter(layer)
most_common = sequence_frequencies.most_common()
character_count: int = sum(o for c, o in most_common)
if character_count <= TOO_SMALL_SEQUENCE:
continue
popular_character_ordered: list[str] = [c for c, o in most_common]
for language in lg_inclusion_list or alphabet_languages(
popular_character_ordered, ignore_non_latin
):
ratio: float = characters_popularity_compare(
language, popular_character_ordered
)
if ratio < threshold:
continue
elif ratio >= 0.8:
sufficient_match_count += 1
results.append((language, round(ratio, 4)))
if sufficient_match_count >= 3:
break
return sorted(
filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
)

View File

@ -0,0 +1,8 @@
from __future__ import annotations
from .__main__ import cli_detect, query_yes_no
__all__ = (
"cli_detect",
"query_yes_no",
)

View File

@ -0,0 +1,381 @@
from __future__ import annotations
import argparse
import sys
import typing
from json import dumps
from os.path import abspath, basename, dirname, join, realpath
from platform import python_version
from unicodedata import unidata_version
import charset_normalizer.md as md_module
from charset_normalizer import from_fp
from charset_normalizer.models import CliDetectionResult
from charset_normalizer.version import __version__
def query_yes_no(question: str, default: str = "yes") -> bool:
"""Ask a yes/no question via input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
"""
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == "":
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
class FileType:
"""Factory for creating file object types
Instances of FileType are typically passed as type= arguments to the
ArgumentParser add_argument() method.
Keyword Arguments:
- mode -- A string indicating how the file is to be opened. Accepts the
same values as the builtin open() function.
- bufsize -- The file's desired buffer size. Accepts the same values as
the builtin open() function.
- encoding -- The file's encoding. Accepts the same values as the
builtin open() function.
- errors -- A string indicating how encoding and decoding errors are to
be handled. Accepts the same value as the builtin open() function.
Backported from CPython 3.12
"""
def __init__(
self,
mode: str = "r",
bufsize: int = -1,
encoding: str | None = None,
errors: str | None = None,
):
self._mode = mode
self._bufsize = bufsize
self._encoding = encoding
self._errors = errors
def __call__(self, string: str) -> typing.IO: # type: ignore[type-arg]
# the special argument "-" means sys.std{in,out}
if string == "-":
if "r" in self._mode:
return sys.stdin.buffer if "b" in self._mode else sys.stdin
elif any(c in self._mode for c in "wax"):
return sys.stdout.buffer if "b" in self._mode else sys.stdout
else:
msg = f'argument "-" with mode {self._mode}'
raise ValueError(msg)
# all other arguments are used as file names
try:
return open(string, self._mode, self._bufsize, self._encoding, self._errors)
except OSError as e:
message = f"can't open '{string}': {e}"
raise argparse.ArgumentTypeError(message)
def __repr__(self) -> str:
args = self._mode, self._bufsize
kwargs = [("encoding", self._encoding), ("errors", self._errors)]
args_str = ", ".join(
[repr(arg) for arg in args if arg != -1]
+ [f"{kw}={arg!r}" for kw, arg in kwargs if arg is not None]
)
return f"{type(self).__name__}({args_str})"
def cli_detect(argv: list[str] | None = None) -> int:
"""
CLI assistant using ARGV and ArgumentParser
:param argv:
:return: 0 if everything is fine, anything else equal trouble
"""
parser = argparse.ArgumentParser(
description="The Real First Universal Charset Detector. "
"Discover originating encoding used on text file. "
"Normalize text to unicode."
)
parser.add_argument(
"files", type=FileType("rb"), nargs="+", help="File(s) to be analysed"
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
default=False,
dest="verbose",
help="Display complementary information about file if any. "
"Stdout will contain logs about the detection process.",
)
parser.add_argument(
"-a",
"--with-alternative",
action="store_true",
default=False,
dest="alternatives",
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
)
parser.add_argument(
"-n",
"--normalize",
action="store_true",
default=False,
dest="normalize",
help="Permit to normalize input file. If not set, program does not write anything.",
)
parser.add_argument(
"-m",
"--minimal",
action="store_true",
default=False,
dest="minimal",
help="Only output the charset detected to STDOUT. Disabling JSON output.",
)
parser.add_argument(
"-r",
"--replace",
action="store_true",
default=False,
dest="replace",
help="Replace file when trying to normalize it instead of creating a new one.",
)
parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
dest="force",
help="Replace file without asking if you are sure, use this flag with caution.",
)
parser.add_argument(
"-i",
"--no-preemptive",
action="store_true",
default=False,
dest="no_preemptive",
help="Disable looking at a charset declaration to hint the detector.",
)
parser.add_argument(
"-t",
"--threshold",
action="store",
default=0.2,
type=float,
dest="threshold",
help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
)
parser.add_argument(
"--version",
action="version",
version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
__version__,
python_version(),
unidata_version,
"OFF" if md_module.__file__.lower().endswith(".py") else "ON",
),
help="Show version information and exit.",
)
args = parser.parse_args(argv)
if args.replace is True and args.normalize is False:
if args.files:
for my_file in args.files:
my_file.close()
print("Use --replace in addition of --normalize only.", file=sys.stderr)
return 1
if args.force is True and args.replace is False:
if args.files:
for my_file in args.files:
my_file.close()
print("Use --force in addition of --replace only.", file=sys.stderr)
return 1
if args.threshold < 0.0 or args.threshold > 1.0:
if args.files:
for my_file in args.files:
my_file.close()
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
return 1
x_ = []
for my_file in args.files:
matches = from_fp(
my_file,
threshold=args.threshold,
explain=args.verbose,
preemptive_behaviour=args.no_preemptive is False,
)
best_guess = matches.best()
if best_guess is None:
print(
'Unable to identify originating encoding for "{}". {}'.format(
my_file.name,
(
"Maybe try increasing maximum amount of chaos."
if args.threshold < 1.0
else ""
),
),
file=sys.stderr,
)
x_.append(
CliDetectionResult(
abspath(my_file.name),
None,
[],
[],
"Unknown",
[],
False,
1.0,
0.0,
None,
True,
)
)
else:
x_.append(
CliDetectionResult(
abspath(my_file.name),
best_guess.encoding,
best_guess.encoding_aliases,
[
cp
for cp in best_guess.could_be_from_charset
if cp != best_guess.encoding
],
best_guess.language,
best_guess.alphabets,
best_guess.bom,
best_guess.percent_chaos,
best_guess.percent_coherence,
None,
True,
)
)
if len(matches) > 1 and args.alternatives:
for el in matches:
if el != best_guess:
x_.append(
CliDetectionResult(
abspath(my_file.name),
el.encoding,
el.encoding_aliases,
[
cp
for cp in el.could_be_from_charset
if cp != el.encoding
],
el.language,
el.alphabets,
el.bom,
el.percent_chaos,
el.percent_coherence,
None,
False,
)
)
if args.normalize is True:
if best_guess.encoding.startswith("utf") is True:
print(
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
my_file.name
),
file=sys.stderr,
)
if my_file.closed is False:
my_file.close()
continue
dir_path = dirname(realpath(my_file.name))
file_name = basename(realpath(my_file.name))
o_: list[str] = file_name.split(".")
if args.replace is False:
o_.insert(-1, best_guess.encoding)
if my_file.closed is False:
my_file.close()
elif (
args.force is False
and query_yes_no(
'Are you sure to normalize "{}" by replacing it ?'.format(
my_file.name
),
"no",
)
is False
):
if my_file.closed is False:
my_file.close()
continue
try:
x_[0].unicode_path = join(dir_path, ".".join(o_))
with open(x_[0].unicode_path, "wb") as fp:
fp.write(best_guess.output())
except OSError as e:
print(str(e), file=sys.stderr)
if my_file.closed is False:
my_file.close()
return 2
if my_file.closed is False:
my_file.close()
if args.minimal is False:
print(
dumps(
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
ensure_ascii=True,
indent=4,
)
)
else:
for my_file in args.files:
print(
", ".join(
[
el.encoding or "undefined"
for el in x_
if el.path == abspath(my_file.name)
]
)
)
return 0
if __name__ == "__main__":
cli_detect()

Some files were not shown because too many files have changed in this diff Show More