commit 37b3ef728bb10deb300e7f2f2b2e55de7dd6502d Author: Arthur Santos Date: Tue Aug 27 16:17:31 2024 -0300 hello alice diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e79221f --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +models/ +test/ + +.env/ +.git/ + +**/__pycache__/ + +*.wav diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/HELP.md b/docs/HELP.md new file mode 100644 index 0000000..6042412 --- /dev/null +++ b/docs/HELP.md @@ -0,0 +1,11 @@ +projeto_lab: python[3|3.10] src/main [--FLAG] + + Flags: + --debugging-video Abre o streaming de video. + --debugging-landmark Abre o streaming de video com reconhecimento de gestos. + --debugging-sound Abre o streaming de video e audio com Image2Sound. + --debugging-lab + --server Abre o debugging para server. + --client Abre o debugging para client. + Exemplo: + python3 src/main.py --video diff --git a/docs/INSTALL.md b/docs/INSTALL.md new file mode 100644 index 0000000..1e72bdc --- /dev/null +++ b/docs/INSTALL.md @@ -0,0 +1,3 @@ +* sudo apt install portaudio19-dev python3 python3-pip python3-dev +* pip install opencv-python mediapipe pyaudio synthesizer +* mkdir models/ && curl https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/latest/gesture_recognizer.task --output models/gesture_recognizer.task diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..aedac70 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +mediapipe +pyaudio +pygame +synthesizer diff --git a/sounds/kick.ogg b/sounds/kick.ogg new file mode 100644 index 0000000..853790e Binary files /dev/null and b/sounds/kick.ogg differ diff --git a/sounds/snare.ogg b/sounds/snare.ogg new file mode 100644 index 0000000..94e1bda Binary files /dev/null and b/sounds/snare.ogg differ diff --git a/src/entrypoint/cli.py b/src/entrypoint/cli.py new file mode 100644 index 0000000..4e60c84 --- /dev/null +++ b/src/entrypoint/cli.py @@ -0,0 +1,45 @@ +import os +import time + +from sistema.io import IO +from sistema.settings import ( DOCS_PATH ) + +from modulos.debug import Debug +from modulos.stream import Stream + +class CLI(object): + + def __init__(self): + pass + + + def handler(self, flag): + + match flag: + case '--debugging-video': + Debug.debug_video() + + case '--debugging-landmark': + Debug.debug_landmark() + + case '--debugging-sound': + Debug.debug_sound() + + case '--debugging-lab': + Debug.debug_lab() + + case '--server': + Stream(is_server=True).server() + + case '--client': + Stream(is_server=False).client() + + case '--help': + self.help() + + case _: + print('Flag não encontrada. Use --help para visualizar todas as opções') + exit(0) + + def help(self): + print(IO.read_as_utf8(DOCS_PATH, 'HELP.md'), end='') diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..08e6eca --- /dev/null +++ b/src/main.py @@ -0,0 +1,12 @@ +import sistema.settings + +from entrypoint.cli import CLI +from sys import argv + +if __name__ == '__main__': + + if len(argv) != 2: + print('Entrada ruim. Utilize python[3|3.10] src/main.py --help') + exit(-1) + + CLI().handler(argv[1]) diff --git a/src/modulos/debug.py b/src/modulos/debug.py new file mode 100644 index 0000000..fcd6fd8 --- /dev/null +++ b/src/modulos/debug.py @@ -0,0 +1,90 @@ +from sistema.landmark import Landmark +from sistema.sound import Sound +from sistema.video import Video +from sistema.fsm import State + +import numpy as np +import cv2 as cv +import threading +import time + +class Debug(object): + + def debug_landmark(): + v = Video(is_client=True) + l = Landmark() + + while True: + frame = v.serialize() + frame = v.deserialize(frame) + + frame, _, _, _ = l.recognize(frame) + + cv.imshow('Debug', frame) + + if cv.waitKey(1) == ord('q'): + break + + v.close() + + + def debug_sound(): + v = Video(is_client=True) + s = Sound() + l = Landmark() + + _start = time.time() + while True: + frame = v.serialize() + frame = v.deserialize(frame) + + frame, x, y, z = l.recognize(frame) + + _end = time.time() + if _end - self._start > 0.4: + x = threading.Thread(target=s.play, args=(x, y, z, )) + x.start() + self._start = _end + cv.imshow('Debug', frame) + + if cv.waitKey(1) == ord('q'): + break + + s.close() + v.close() + + + def debug_video(): + v = Video(is_client=True) + + while True: + frame = v.serialize() + frame = v.deserialize(frame) + + cv.imshow('Debug', frame) + + if cv.waitKey(1) == ord('q'): + break + + v.close() + + + def debug_lab(): + v = Video(is_client=True) + l = Landmark() + s = State() + + while True: + frame = v.serialize() + frame = v.deserialize(frame) + + frame, gesture_name, position_x = l.read_gesture(frame) + + s.check((gesture_name, position_x)) + + cv.imshow('Debug', frame) + + if cv.waitKey(1) == ord('q'): + break + + v.close() diff --git a/src/modulos/segmentation.py b/src/modulos/segmentation.py new file mode 100644 index 0000000..439e0c0 --- /dev/null +++ b/src/modulos/segmentation.py @@ -0,0 +1,30 @@ +# TODO +import numpy as np +import cv2 as cv + +from sistema.video import Video + +class Segmentation(object): + + def __init__(self): + pass + + + def depth_color_map(self, frame): + return frame + + + def debug(self): + v = Video() + + while True: + frame = v.serialize() + + frame = self.depth_color_map(frame) + + cv.imshow('Debug', frame) + + if cv.waitKey(1) == ord('q'): + break + + v.close() diff --git a/src/modulos/stream.py b/src/modulos/stream.py new file mode 100644 index 0000000..dbc637b --- /dev/null +++ b/src/modulos/stream.py @@ -0,0 +1,71 @@ +import struct +import socket +import cv2 as cv + +from sistema.video import Video +from sistema.settings import ( HOST, PORT ) + + +class Stream(object): + + def __init__(self, is_server=False): + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + if is_server: + self.socket.bind((HOST, PORT)) + self.socket.listen() + else: + self.socket.connect((HOST, PORT)) + + + def server(self): + v = Video(is_client=False) + conn, addr = self.socket.accept() + + PAYLOAD_SIZE = struct.calcsize("Q") + + streaming_data = b'' + + while True: + + while len(streaming_data) < PAYLOAD_SIZE: + packet = conn.recv(4 * 1024) + if not packet: break + streaming_data += packet + + packed_msg_size = streaming_data[:PAYLOAD_SIZE] + streaming_data = streaming_data[PAYLOAD_SIZE:] + video_msg_size = struct.unpack("Q", packed_msg_size)[0] + + while len(streaming_data) < video_msg_size: + streaming_data += conn.recv(4 * 1024) + + serialized_frame = streaming_data[:video_msg_size] + streaming_data = streaming_data[video_msg_size:] + + frame = v.deserialize(serialized_frame) + cv.imshow("Stream", frame) + + if cv.waitKey(1) == ord('q'): + break + + v.close() + self.close() + + + def client(self): + v = Video(is_client=True) + + while(True): + serialized_frame = v.serialize() + serialized_frame = struct.pack("Q", len(serialized_frame)) \ + + serialized_frame + + self.socket.sendall(serialized_frame) + + v.close() + self.close() + + + def close(self): + self.socket.close() diff --git a/src/sistema/fsm.py b/src/sistema/fsm.py new file mode 100644 index 0000000..cce3cbe --- /dev/null +++ b/src/sistema/fsm.py @@ -0,0 +1,103 @@ +import os +import pygame +from sistema.settings import ( SOUNDS_PATH ) + +import threading +import time + +class State(object): + + def __init__(self): + self.STATES = [ + 'normal', + 'insert', + 'edit', + 'visual', + 'exit' + ] + self.init_state = 'normal' + self.curr_state = self.init_state + self.end_state = 'exit' + + self.gesture_name = None + self.enter_state = True + self.can_play_audio = True + self._start = time.time() + self.position_x = 0 + + + def check(self, package): + + self.gesture_name = package[0] + self.position_x = package[1] + + print(self.position_x) + + match self.curr_state: + case 'normal': + self.normal_state() + case 'insert': + self.insert_state() + case 'exit': + self.exit_state() + + + def set_state(self, new_state): + if new_state != self.curr_state: + self.enter_state = True + + self.curr_state = new_state + + + def normal_state(self): + if self.enter_state: + self.can_play_audio = True + self.enter_state = False + + self.set_state(self.check_normal()) + + def insert_state(self): + if self.enter_state: + _end = time.time() + if _end - self._start > 0.4: + x = threading.Thread(target=self.play) + x.start() + self.can_play_audio = False + self._start = _end + self.enter_state = False + + self.set_state(self.check_insert()) + + def exit_state(self): + exit(0) + + + def check_normal(self): + new_state = self.curr_state + if self.gesture_name == 'Closed_Fist': + new_state = 'insert' + if self.curr_state in ['normal', 'insert'] and self.gesture_name == 'Victory': + new_state = 'exit' + return new_state + + + def check_insert(self): + new_state = self.curr_state + if self.gesture_name in ['Open_Palm', 'Thumb_Up']: + new_state = 'normal' + + if self.curr_state in ['normal', 'insert'] and self.gesture_name == 'Victory': + new_state = 'exit' + return new_state + + + def play(self): + pygame.mixer.init() + pygame.mixer.music.load(os.path.join(SOUNDS_PATH, 'kick.ogg')) + if self.position_x > 0.5: + pygame.mixer.music.load(os.path.join(SOUNDS_PATH, 'snare.ogg')) + else: + pygame.mixer.music.load(os.path.join(SOUNDS_PATH, 'kick.ogg')) + pygame.mixer.music.play() + pygame.time.delay(1000) + pygame.mixer.music.stop() diff --git a/src/sistema/io.py b/src/sistema/io.py new file mode 100644 index 0000000..33da770 --- /dev/null +++ b/src/sistema/io.py @@ -0,0 +1,27 @@ +import os +import json + + +class IO(object): + + def read_as_json(filepath, filename) -> dict: + with open(os.path.join(filepath, filename), 'r', encoding='utf8') as f: + data = json.load(f) + return data + + + def read_as_utf8(filepath, filename) -> str: + with open(os.path.join(filepath, filename), 'r', encoding='utf8') as f: + data = f.read() + return data + + + def update(filepath, filename, data): + with open(os.path.join(filepath, filename), 'w', encoding='utf8') as f: + json.dump( + obj=data, + fp=f, + ensure_ascii=False, + indent='\t', + separators=(',', ': ') + ) diff --git a/src/sistema/landmark.py b/src/sistema/landmark.py new file mode 100644 index 0000000..5e92518 --- /dev/null +++ b/src/sistema/landmark.py @@ -0,0 +1,115 @@ +import os +import mediapipe as mp + +from mediapipe.tasks import python +from mediapipe.tasks.python import vision +from mediapipe.framework.formats import landmark_pb2 + +from sistema.settings import ( MODELS_PATH ) + +class Landmark(object): + + def __init__(self): + VisionRunningMode = mp.tasks.vision.RunningMode + + self.mp_hands = mp.solutions.hands + self.mp_drawing = mp.solutions.drawing_utils + self.mp_drawing_styles = mp.solutions.drawing_styles + + + self.base_options = python.BaseOptions( + model_asset_path = os.path.join(MODELS_PATH, 'gesture_recognizer.task') + ) + + self.options = vision.GestureRecognizerOptions( + base_options = self.base_options, + num_hands = 2 + # running_mode = VisionRunningMode.VIDEO + ) + + self.recognizer = vision.GestureRecognizer.create_from_options( + self.options + ) + + + def recognize(self, frame): + mp_frame = mp.Image( + image_format=mp.ImageFormat.SRGB, + data=frame + ) + + recognition_result = self.recognizer.recognize( + mp_frame + ) + + if len(recognition_result.gestures) == 0: + return frame, 0, 0, 0 + + gesture = recognition_result.gestures[0][0] + gesture_name = gesture.category_name + hand_landmarks = recognition_result.hand_landmarks + + test_x = 0 + test_y = 0.0 + + for hand_landmark in hand_landmarks: + hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList() + hand_landmarks_proto.landmark.extend([ + landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmark]) + + self.mp_drawing.draw_landmarks( + frame, + hand_landmarks_proto, + self.mp_hands.HAND_CONNECTIONS, + self.mp_drawing_styles.get_default_hand_landmarks_style(), + self.mp_drawing_styles.get_default_hand_connections_style() + ) + + return frame, hand_landmarks[0][8].x * 100, hand_landmarks[0][8].y * 100, hand_landmarks[0][8].z * 10 + + + def read_gesture(self, frame): + mp_frame = mp.Image( + image_format=mp.ImageFormat.SRGB, + data=frame + ) + + recognition_result = self.recognizer.recognize( + mp_frame + ) + + if len(recognition_result.gestures) == 0: + return frame, None, 0 + + gesture = recognition_result.gestures[0][0] + gesture_name = gesture.category_name + hand_landmarks = recognition_result.hand_landmarks + + print(gesture_name) + + for hand_landmark in hand_landmarks: + hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList() + hand_landmarks_proto.landmark.extend( + [ + landmark_pb2.NormalizedLandmark( + x=landmark.x, + y=landmark.y, + z=landmark.z + ) + for landmark in hand_landmark + ] + ) + + self.mp_drawing.draw_landmarks( + frame, + hand_landmarks_proto, + self.mp_hands.HAND_CONNECTIONS, + self.mp_drawing_styles.get_default_hand_landmarks_style(), + self.mp_drawing_styles.get_default_hand_connections_style() + ) + + return frame, gesture_name, hand_landmarks[0][8].x + + + def serialize(self): + pass diff --git a/src/sistema/settings.py b/src/sistema/settings.py new file mode 100644 index 0000000..306337b --- /dev/null +++ b/src/sistema/settings.py @@ -0,0 +1,33 @@ +import os +from sys import path +from pathlib import Path +from platform import system + +BASE_PATH = Path(__file__).resolve().parent.parent.parent +SRC_PATH = Path(__file__).resolve().parent.parent + +path.append(str(SRC_PATH)) + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# GET SSH PUBKEY +SSH_PUBKEY = '' +if os.path.exists(f'{Path.home()}/.ssh/id_rsa.pub'): + with open(f'{Path.home()}/.ssh/id_rsa.pub', 'r') as f: + SSH_PUBKEY = f.read().replace('\n', '') +# ///////////////////////////////////////////////////////////////////////////// + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# ROOT FOLDERS PATH + +DOCS_PATH = os.path.join(BASE_PATH, 'docs') +MODELS_PATH = os.path.join(BASE_PATH, 'models') +SOUNDS_PATH = os.path.join(BASE_PATH, 'sounds') + +# ///////////////////////////////////////////////////////////////////////////// + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# SERVER SETTINGS + +HOST = '127.0.0.1' +PORT = 65432 +# ///////////////////////////////////////////////////////////////////////////// diff --git a/src/sistema/sound.py b/src/sistema/sound.py new file mode 100644 index 0000000..e01e2f8 --- /dev/null +++ b/src/sistema/sound.py @@ -0,0 +1,37 @@ +import pyaudio +from synthesizer import Player, Synthesizer, Waveform + +import numpy as np +from types import NoneType + +CHORD_FREQUENCIES = [ + 32.7, + 36.7, + 41.2, + 43.7, + 49.0, + 55.0, + 61.7 +] + + +class Sound(object): + + def __init__(self): + self.player = Player() + self.player.open_stream() + + self.synthesizer = Synthesizer(osc1_waveform=Waveform.sawtooth, osc1_volume=0.1, use_osc2=False) + + def play(self, x, y, z) -> NoneType: + + player = Player() + player.open_stream() + synthesizer = Synthesizer(osc1_waveform=Waveform.square, osc1_volume=0.1, use_osc2=False) + chord_mapped = [CHORD_FREQUENCIES[int(x // (100 / 7))] * 2**int(y // (100 / 7))] + player.play_wave(synthesizer.generate_chord(chord_mapped, abs(z))) + player._pyaudio.terminate() + + + def close(self): + pass diff --git a/src/sistema/video.py b/src/sistema/video.py new file mode 100644 index 0000000..9a7370b --- /dev/null +++ b/src/sistema/video.py @@ -0,0 +1,42 @@ +import pickle +import cv2 as cv +import numpy as np + +from types import NoneType + +class Video(object): + + def __init__(self, is_client): + self.camera = cv.VideoCapture(0) + + self.WIDTH = 640 + self.HEIGHT = 360 + + if is_client: + #TODO: Resizible dimensions + #self.WIDTH = int(self.camera.get(cv.CAP_PROP_FRAME_WIDTH)) + #self.HEIGHT = int(self.camera.get(cv.CAP_PROP_FRAME_HEIGHT)) + + #self.WIDTH = 640 + #self.HEIGHT = 360 + + if not self.camera.isOpened(): + raise('Não foi possível abrir a câmera.') + + + def serialize(self) -> bytes: + ret, frame = self.camera.read() + + if not ret: + raise('Não foi possível receber o frame.') + + return pickle.dumps(frame) + + + def deserialize(self, serialized_ndarray) -> np.ndarray: + return pickle.loads(serialized_ndarray) + + + def close(self) -> NoneType: + self.camera.release() + cv.destroyAllWindows()