Initial commit

2025-12-05 11:27:16 -03:00
commit 804bacfbe3
87 changed files with 7260 additions and 0 deletions
--- a/gestion_pedidos/services/email_parser.py
+++ b/gestion_pedidos/services/email_parser.py
@@ -0,0 +1,104 @@
+"""
+Parser para emails de confirmación de pedidos a proveedores
+"""
+import email
+from pathlib import Path
+from typing import Dict, List, Optional
+import re
+
+
+class EmailPedidoParser:
+    """Parser para extraer información de emails de confirmación de pedidos"""
+    
+    def parse_email_file(self, email_path: Path) -> Dict:
+        """
+        Parsea un archivo de email (.eml)
+        
+        Returns:
+            Dict con información del pedido
+        """
+        with open(email_path, 'rb') as f:
+            msg = email.message_from_bytes(f.read())
+        
+        # Extraer información básica
+        subject = msg.get('Subject', '')
+        from_addr = msg.get('From', '')
+        date = msg.get('Date', '')
+        
+        # Extraer cuerpo del email
+        body = self._get_email_body(msg)
+        
+        # Buscar número de pedido
+        numero_pedido = self._extract_numero_pedido(subject, body)
+        
+        # Buscar referencias en el cuerpo
+        referencias = self._extract_referencias(body)
+        
+        return {
+            'numero_pedido': numero_pedido,
+            'proveedor_email': from_addr,
+            'fecha': date,
+            'asunto': subject,
+            'cuerpo': body,
+            'referencias': referencias,
+        }
+    
+    def _get_email_body(self, msg: email.message.Message) -> str:
+        """Extrae el cuerpo del email"""
+        body = ""
+        
+        if msg.is_multipart():
+            for part in msg.walk():
+                content_type = part.get_content_type()
+                if content_type == "text/plain":
+                    try:
+                        body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
+                        break
+                    except:
+                        pass
+        else:
+            try:
+                body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
+            except:
+                pass
+        
+        return body
+    
+    def _extract_numero_pedido(self, subject: str, body: str) -> Optional[str]:
+        """Extrae el número de pedido del asunto o cuerpo"""
+        # Patrones comunes
+        patterns = [
+            r'pedido[:\s]+([A-Z0-9\-]+)',
+            r'pedido[:\s]+#?(\d+)',
+            r'ref[:\s]+([A-Z0-9\-]+)',
+            r'order[:\s]+([A-Z0-9\-]+)',
+        ]
+        
+        text = f"{subject} {body}".lower()
+        
+        for pattern in patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+        
+        return None
+    
+    def _extract_referencias(self, body: str) -> List[Dict]:
+        """Extrae referencias del cuerpo del email"""
+        referencias = []
+        
+        # Buscar líneas que parezcan referencias
+        # Formato común: REF123 - Descripción - Cantidad
+        pattern = r'([A-Z0-9\-]+)\s*[-–]\s*([^-\n]+?)\s*[-–]\s*(\d+)'
+        
+        matches = re.finditer(pattern, body, re.IGNORECASE | re.MULTILINE)
+        
+        for match in matches:
+            referencias.append({
+                'referencia': match.group(1).strip(),
+                'denominacion': match.group(2).strip(),
+                'unidades': int(match.group(3)),
+            })
+        
+        return referencias
+