codebase python
This commit is contained in:
parent
aa44215eb1
commit
759afbdd49
9995
_codebase_output.txt
Normal file
9995
_codebase_output.txt
Normal file
File diff suppressed because it is too large
Load Diff
362
_codebase_schemafile.sql
Normal file
362
_codebase_schemafile.sql
Normal file
@ -0,0 +1,362 @@
|
||||
-- MariaDB dump 10.19 Distrib 10.4.32-MariaDB, for Win64 (AMD64)
|
||||
--
|
||||
-- Host: localhost Database: tp_servicedesk
|
||||
-- ------------------------------------------------------
|
||||
-- Server version 10.4.32-MariaDB
|
||||
|
||||
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
|
||||
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
|
||||
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
|
||||
/*!40101 SET NAMES utf8mb4 */;
|
||||
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
|
||||
/*!40103 SET TIME_ZONE='+00:00' */;
|
||||
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
|
||||
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
|
||||
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
|
||||
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
|
||||
|
||||
--
|
||||
-- Table structure for table `attachments`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `attachments`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `attachments` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`path` text NOT NULL,
|
||||
`file_name` text NOT NULL,
|
||||
`ticket_id` int(11) DEFAULT NULL,
|
||||
`kb_id` int(11) DEFAULT NULL,
|
||||
`version_number` int(11) NOT NULL,
|
||||
`uploaded_by` int(11) NOT NULL,
|
||||
`created_at` timestamp NOT NULL DEFAULT current_timestamp(),
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `kb`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `kb`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `kb` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`title` text NOT NULL,
|
||||
`content` text NOT NULL,
|
||||
`created_by` int(11) NOT NULL,
|
||||
`updated_by` int(11) DEFAULT NULL,
|
||||
`created_at` timestamp NULL DEFAULT current_timestamp(),
|
||||
`updated_at` timestamp NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `kb_tags`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `kb_tags`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `kb_tags` (
|
||||
`kb_id` int(11) NOT NULL,
|
||||
`tag_id` int(11) NOT NULL,
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `project_events`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `project_events`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `project_events` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`project_id` int(11) NOT NULL,
|
||||
`event_date` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
|
||||
`description` text NOT NULL,
|
||||
`created_by` int(11) NOT NULL,
|
||||
`created_at` timestamp NULL DEFAULT current_timestamp(),
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `project_links`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `project_links`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `project_links` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`project_id` int(11) NOT NULL,
|
||||
`url` text NOT NULL,
|
||||
`description` text NOT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `project_tasks`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `project_tasks`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `project_tasks` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`project_id` int(11) NOT NULL,
|
||||
`title` varchar(255) NOT NULL,
|
||||
`description` text NOT NULL,
|
||||
`status` enum('pending','in_progress','done','') NOT NULL,
|
||||
`start_date` timestamp NULL DEFAULT NULL,
|
||||
`end_date` timestamp NULL DEFAULT NULL,
|
||||
`created_by` int(11) NOT NULL,
|
||||
`created_at` int(11) NOT NULL DEFAULT current_timestamp(),
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `projects`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `projects`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `projects` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`title` varchar(255) NOT NULL,
|
||||
`description` text NOT NULL,
|
||||
`requester` varchar(255) NOT NULL,
|
||||
`created_by` int(11) NOT NULL,
|
||||
`start_date` timestamp NULL DEFAULT NULL,
|
||||
`end_date` timestamp NULL DEFAULT NULL,
|
||||
`created_at` timestamp NOT NULL DEFAULT current_timestamp(),
|
||||
`updated_at` timestamp NULL DEFAULT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `roles`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `roles`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `roles` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`role` varchar(50) NOT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `sessions`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `sessions`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `sessions` (
|
||||
`session_id` varchar(255) NOT NULL,
|
||||
`data` text DEFAULT NULL,
|
||||
`ip` varchar(45) DEFAULT NULL,
|
||||
`agent` varchar(300) DEFAULT NULL,
|
||||
`stamp` int(11) DEFAULT NULL,
|
||||
PRIMARY KEY (`session_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `tags`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `tags`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `tags` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`name` text NOT NULL,
|
||||
`color` text NOT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_comments`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_comments`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_comments` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`ticket_id` int(11) NOT NULL,
|
||||
`comment` text NOT NULL,
|
||||
`created_by` int(11) DEFAULT NULL,
|
||||
`created_at` timestamp NULL DEFAULT current_timestamp(),
|
||||
`deleted` int(11) NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `ticket_id` (`ticket_id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_meta`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_meta`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_meta` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`ticket_id` int(11) NOT NULL,
|
||||
`meta_key` text NOT NULL,
|
||||
`meta_value` text NOT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=46 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_priorities`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_priorities`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_priorities` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`name` varchar(100) NOT NULL,
|
||||
`sort_order` int(11) NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_relations`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_relations`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_relations` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`parent_ticket_id` int(11) NOT NULL,
|
||||
`child_ticket_id` int(11) NOT NULL,
|
||||
`created_at` timestamp NOT NULL DEFAULT current_timestamp(),
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_statuses`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_statuses`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_statuses` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`name` varchar(100) NOT NULL,
|
||||
`sort_order` int(11) NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_tags`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_tags`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_tags` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`ticket_id` int(11) NOT NULL,
|
||||
`tag_id` int(11) NOT NULL,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `ticket_updates`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `ticket_updates`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `ticket_updates` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`ticket_id` int(11) NOT NULL,
|
||||
`comment` text NOT NULL,
|
||||
`updated_by` int(11) NOT NULL,
|
||||
`created_at` timestamp NOT NULL DEFAULT current_timestamp(),
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `tickets`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `tickets`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `tickets` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`title` text NOT NULL,
|
||||
`description` text NOT NULL,
|
||||
`status_id` int(11) NOT NULL,
|
||||
`priority_id` int(11) NOT NULL,
|
||||
`created_by` int(11) NOT NULL,
|
||||
`assigned_to` int(11) DEFAULT NULL,
|
||||
`created_at` timestamp NULL DEFAULT current_timestamp(),
|
||||
`updated_at` timestamp NULL DEFAULT NULL,
|
||||
`updated_by` int(11) DEFAULT NULL,
|
||||
`project_id` int(11) DEFAULT NULL,
|
||||
`recycled` tinyint(1) NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=100 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `users`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `users`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `users` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`username` text NOT NULL,
|
||||
`password` text NOT NULL,
|
||||
`email` text NOT NULL,
|
||||
`display_name` text NOT NULL,
|
||||
`role` int(11) NOT NULL,
|
||||
`created_at` timestamp NULL DEFAULT NULL,
|
||||
`updated_at` timestamp NULL DEFAULT NULL,
|
||||
`is_admin` tinyint(1) NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `username` (`username`) USING HASH
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
|
||||
|
||||
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
|
||||
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
|
||||
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
|
||||
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
|
||||
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
|
||||
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
|
||||
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
|
||||
|
||||
-- Dump completed on 2025-04-30 0:35:57
|
||||
487
codebase-to-text.py
Normal file
487
codebase-to-text.py
Normal file
@ -0,0 +1,487 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import git # Still needed for potential future use or for checking if it's a repo, even if not cloning
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from docx import Document
|
||||
from pathspec import PathSpec
|
||||
from pathspec.patterns import GitWildMatchPattern # Explicit import often good practice
|
||||
|
||||
class CodebaseToText:
|
||||
def __init__(self, input_path, output_path, output_type, verbose, exclude_hidden, ignored_paths=None):
|
||||
# Normalize input path early
|
||||
self.input_path = os.path.abspath(input_path) # Use absolute path for consistency
|
||||
self.output_path = output_path
|
||||
self.output_type = output_type
|
||||
self.verbose = verbose
|
||||
self.exclude_hidden = exclude_hidden
|
||||
self.ignored_paths = ignored_paths if ignored_paths else [] # Store custom ignores
|
||||
|
||||
self.temp_folder_path = None # Used only if cloning
|
||||
self.is_cloned_repo = False # Flag to track if we cloned
|
||||
|
||||
self.git_ignore_spec = None
|
||||
self.custom_ignore_spec = None
|
||||
|
||||
self._initialize_ignores() # Load ignores after setting input_path
|
||||
|
||||
def _initialize_ignores(self):
|
||||
"""Loads .gitignore and initializes custom ignore spec."""
|
||||
# Load .gitignore relative to the current input_path
|
||||
gitignore_path = os.path.join(self.input_path, ".gitignore")
|
||||
if os.path.exists(gitignore_path):
|
||||
try:
|
||||
with open(gitignore_path, 'r', encoding='utf-8') as f: # Specify encoding
|
||||
lines = f.read().splitlines()
|
||||
# Filter out empty lines and comments
|
||||
lines = [line for line in lines if line.strip() and not line.strip().startswith('#')]
|
||||
if lines:
|
||||
self.git_ignore_spec = PathSpec.from_lines(GitWildMatchPattern, lines)
|
||||
if self.verbose:
|
||||
print(f"Loaded .gitignore rules from: {gitignore_path}")
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not read .gitignore file at {gitignore_path}: {e}")
|
||||
elif self.verbose:
|
||||
print(f"No .gitignore file found at: {gitignore_path}")
|
||||
|
||||
# Create PathSpec for custom ignored paths
|
||||
if self.ignored_paths:
|
||||
# Filter out empty lines/patterns just in case
|
||||
valid_custom_paths = [p for p in self.ignored_paths if p.strip()]
|
||||
if valid_custom_paths:
|
||||
self.custom_ignore_spec = PathSpec.from_lines(GitWildMatchPattern, valid_custom_paths)
|
||||
if self.verbose:
|
||||
print(f"Using custom ignore rules: {valid_custom_paths}")
|
||||
else:
|
||||
self.ignored_paths = [] # Clear if only contained empty strings
|
||||
|
||||
|
||||
def _is_path_ignored(self, file_or_dir_path):
|
||||
"""Checks if a given path should be ignored based on all rules."""
|
||||
try:
|
||||
# Calculate relative path from the project root (self.input_path)
|
||||
# Use pathlib for robustness
|
||||
base_path = Path(self.input_path)
|
||||
target_path = Path(file_or_dir_path)
|
||||
# Use absolute paths temporarily to ensure correct relative calculation
|
||||
rel_path = target_path.relative_to(base_path).as_posix() # Use POSIX paths for pathspec
|
||||
except ValueError:
|
||||
# If the path is not relative to input_path (shouldn't normally happen with os.walk)
|
||||
if self.verbose:
|
||||
print(f"Warning: Path {file_or_dir_path} is not relative to {self.input_path}. Skipping ignore checks for it.")
|
||||
return False # Or decide how to handle this case
|
||||
|
||||
# Check .gitignore rules
|
||||
if self.git_ignore_spec and self.git_ignore_spec.match_file(rel_path):
|
||||
if self.verbose > 1: # More detailed verbose logging if needed
|
||||
print(f"Ignoring '{rel_path}' (gitignore)")
|
||||
return True
|
||||
|
||||
# Check custom ignore rules
|
||||
if self.custom_ignore_spec and self.custom_ignore_spec.match_file(rel_path):
|
||||
if self.verbose > 1:
|
||||
print(f"Ignoring '{rel_path}' (custom)")
|
||||
return True
|
||||
|
||||
# Check if hidden files/dirs should be excluded
|
||||
# Note: PathSpec patterns can also match hidden files (e.g., '.*'),
|
||||
# so this check is primarily for the simple dot/underscore prefix rule.
|
||||
if self.exclude_hidden and self._is_hidden_path_component(target_path):
|
||||
if self.verbose > 1:
|
||||
print(f"Ignoring '{rel_path}' (hidden)")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_hidden_path_component(self, path_obj: Path):
|
||||
"""Checks if any component of the path starts with '.' or '__'."""
|
||||
# Check the name itself and its parents relative to the base input path
|
||||
relative_parts = path_obj.relative_to(self.input_path).parts
|
||||
return any(part.startswith(('.', '__')) for part in relative_parts if part != '.')
|
||||
|
||||
|
||||
def _parse_folder(self, folder_path):
|
||||
"""Generates the directory tree string, respecting ignore rules."""
|
||||
tree = ""
|
||||
base_level = folder_path.count(os.sep)
|
||||
|
||||
# Ensure folder_path is absolute for consistent relative path calculations
|
||||
abs_folder_path = Path(folder_path).resolve()
|
||||
|
||||
for root, dirs, files in os.walk(abs_folder_path, topdown=True):
|
||||
abs_root_path = Path(root).resolve()
|
||||
|
||||
# --- Directory Ignore Logic ---
|
||||
# Filter directories *before* recursing into them
|
||||
# Keep track of original dirs list to modify dirs[:]
|
||||
original_dirs = list(dirs)
|
||||
dirs[:] = [] # Clear dirs list, we will re-add ones we want to keep
|
||||
|
||||
for d in original_dirs:
|
||||
dir_path = abs_root_path / d
|
||||
# Skip .git directory explicitly (essential)
|
||||
if d == ".git":
|
||||
if self.verbose > 1: print(f"Skipping .git directory: {dir_path}")
|
||||
continue
|
||||
|
||||
if self._is_path_ignored(str(dir_path)):
|
||||
if self.verbose:
|
||||
print(f"Ignoring directory: {dir_path.relative_to(self.input_path)}")
|
||||
# Don't add 'd' back to dirs[:], effectively pruning the walk
|
||||
else:
|
||||
dirs.append(d) # Keep this directory for recursion
|
||||
|
||||
# --- Calculate Tree Indentation ---
|
||||
try:
|
||||
# Calculate level relative to the *initial* input path for correct indentation
|
||||
rel_root = abs_root_path.relative_to(self.input_path)
|
||||
level = len(rel_root.parts) if rel_root.parts != ('.',) else 0
|
||||
except ValueError:
|
||||
# Should not happen if os.walk starts within input_path
|
||||
print(f"Warning: Cannot determine relative path for {abs_root_path}. Using level 0.")
|
||||
level = 0
|
||||
|
||||
indent = ' ' * level # 4 spaces per level
|
||||
# Add directory entry to tree (only if it's not the root itself processed initially)
|
||||
if abs_root_path != Path(self.input_path).resolve(): # Don't print root '/'
|
||||
tree += f"{indent}{abs_root_path.name}/\n"
|
||||
elif level == 0 and not tree: # Print root marker only once at the start
|
||||
tree += f"{Path(self.input_path).name}/\n"
|
||||
|
||||
|
||||
# --- File Listing ---
|
||||
subindent = ' ' * (level + 1)
|
||||
sorted_files = sorted(files) # Sort files for consistent output
|
||||
|
||||
for f in sorted_files:
|
||||
file_path = abs_root_path / f
|
||||
# Check if file is ignored
|
||||
if not self._is_path_ignored(str(file_path)):
|
||||
tree += f"{subindent}{f}\n"
|
||||
elif self.verbose:
|
||||
# Note: _is_path_ignored already prints detailed reasons if verbose > 1
|
||||
print(f"Ignoring file (in tree): {file_path.relative_to(self.input_path)}")
|
||||
|
||||
|
||||
if self.verbose:
|
||||
print(f"\n--- Generated File Tree ---\n{tree}")
|
||||
print("--- End File Tree ---\n")
|
||||
|
||||
return tree
|
||||
|
||||
def _get_file_contents(self, file_path):
|
||||
"""Reads file content, handling potential encoding issues."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
# Try a fallback encoding (e.g., latin-1 or detected encoding)
|
||||
with open(file_path, 'r', encoding='latin-1') as file:
|
||||
if self.verbose: print(f"Warning: Used fallback encoding 'latin-1' for {file_path}")
|
||||
return file.read()
|
||||
except Exception as e:
|
||||
print(f"Error: Could not read file {file_path} with utf-8 or latin-1: {e}")
|
||||
return f"Error reading file: {e}" # Include error message in output
|
||||
except Exception as e:
|
||||
print(f"Error reading file {file_path}: {e}")
|
||||
return f"Error reading file: {e}"
|
||||
|
||||
|
||||
def _process_files(self, path_to_walk):
|
||||
"""Walks through files and concatenates their content, respecting ignores."""
|
||||
content = ""
|
||||
base_path = Path(self.input_path).resolve() # Use resolved base path
|
||||
|
||||
for root, dirs, files in os.walk(path_to_walk, topdown=True):
|
||||
abs_root_path = Path(root).resolve()
|
||||
|
||||
# --- Directory Pruning (same logic as in _parse_folder) ---
|
||||
original_dirs = list(dirs)
|
||||
dirs[:] = []
|
||||
for d in original_dirs:
|
||||
dir_path = abs_root_path / d
|
||||
if d == ".git" or self._is_path_ignored(str(dir_path)):
|
||||
continue # Skip ignored or .git dirs
|
||||
else:
|
||||
dirs.append(d)
|
||||
|
||||
# --- Process Files in Current Directory ---
|
||||
sorted_files = sorted(files)
|
||||
for file_name in sorted_files:
|
||||
file_path = abs_root_path / file_name
|
||||
str_file_path = str(file_path)
|
||||
|
||||
# Skip ignored files
|
||||
if self._is_path_ignored(str_file_path):
|
||||
if self.verbose:
|
||||
print(f"Ignoring file (content): {file_path.relative_to(base_path)}")
|
||||
continue
|
||||
|
||||
# Try to get content
|
||||
try:
|
||||
if self.verbose:
|
||||
print(f"Processing: {file_path.relative_to(base_path)}")
|
||||
|
||||
file_content = self._get_file_contents(str_file_path)
|
||||
rel_file_path_display = file_path.relative_to(base_path).as_posix() # Display relative path
|
||||
|
||||
content += f"\n\n--- File: {rel_file_path_display} ---\n"
|
||||
# Optional: Add file type hint
|
||||
# content += f"File type: {os.path.splitext(file_name)[1]}\n\n"
|
||||
content += file_content
|
||||
# Use a clear end marker
|
||||
content += f"\n--- End File: {rel_file_path_display} ---\n"
|
||||
|
||||
except Exception as e: # Catch potential errors during processing
|
||||
print(f"Couldn't process {file_path.relative_to(base_path)}: {e}")
|
||||
content += f"\n\n--- Error processing file: {file_path.relative_to(base_path)} --- \n {e} \n--- End Error ---\n"
|
||||
|
||||
return content
|
||||
|
||||
def get_text(self):
|
||||
"""Generates the final combined text output."""
|
||||
# --- Decide whether to clone or use local path ---
|
||||
process_path = self.input_path # Default to local path
|
||||
if self.is_github_repo():
|
||||
success = self._clone_github_repo()
|
||||
if success:
|
||||
process_path = self.temp_folder_path
|
||||
self.is_cloned_repo = True
|
||||
# Re-initialize ignores for the cloned repo location
|
||||
self.input_path = process_path # Temporarily change base for ignore checks
|
||||
self._initialize_ignores()
|
||||
print(f"Processing cloned repo at: {process_path}")
|
||||
else:
|
||||
print("Error: Failed to clone GitHub repository. Aborting.")
|
||||
# Reset input_path if cloning failed and we modified it
|
||||
if self.is_cloned_repo: self.input_path = os.path.dirname(self.temp_folder_path) # Hacky way to get original path back conceptually
|
||||
return "Error: Could not clone repository." # Return error message
|
||||
else:
|
||||
print(f"Processing local path: {process_path}")
|
||||
# Ensure ignores are initialized for the local path (done in __init__)
|
||||
|
||||
|
||||
# --- Generate Structure and Content ---
|
||||
folder_structure = self._parse_folder(process_path)
|
||||
file_contents = self._process_files(process_path)
|
||||
|
||||
# --- Assemble Final Output ---
|
||||
folder_structure_header = "--- Folder Structure ---"
|
||||
file_contents_header = "--- File Contents ---"
|
||||
delimiter = "=" * 60 # Use a more prominent delimiter
|
||||
|
||||
# Restore original input_path if it was changed for cloning
|
||||
if self.is_cloned_repo:
|
||||
# This assumes the original input_path wasn't needed after _initialize_ignores
|
||||
# A cleaner way might be to pass the base path explicitly to ignore checkers
|
||||
pass # No need to restore if input_path wasn't critical after cloning
|
||||
|
||||
return (
|
||||
f"{folder_structure_header}\n{folder_structure}\n{delimiter}\n\n"
|
||||
f"{file_contents_header}\n{file_contents}\n{delimiter}\nEnd of Codebase\n{delimiter}"
|
||||
)
|
||||
|
||||
def get_file(self):
|
||||
"""Gets the text and saves it to the specified output file."""
|
||||
text_content = self.get_text()
|
||||
|
||||
# Check for error during get_text (e.g., cloning failure)
|
||||
if text_content.startswith("Error:"):
|
||||
print(text_content) # Print the error
|
||||
# Optionally, clean up temp folder even on error
|
||||
self.clean_up_temp_folder()
|
||||
return # Exit without writing file
|
||||
|
||||
try:
|
||||
# Ensure output directory exists
|
||||
output_dir = os.path.dirname(self.output_path)
|
||||
if output_dir: # Handle case where output is in current dir
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
if self.output_type == "txt":
|
||||
with open(self.output_path, "w", encoding='utf-8') as file:
|
||||
file.write(text_content)
|
||||
elif self.output_type == "docx":
|
||||
doc = Document()
|
||||
# Add text respecting paragraphs (simple split, might need refinement)
|
||||
# Consider adding as preformatted text run if python-docx supports it well
|
||||
for paragraph in text_content.split('\n'):
|
||||
doc.add_paragraph(paragraph)
|
||||
doc.save(self.output_path)
|
||||
else:
|
||||
# Should be caught by argparse choices usually, but good to have
|
||||
raise ValueError(f"Invalid output type '{self.output_type}'. Supported types: txt, docx")
|
||||
|
||||
print(f"\nSuccessfully generated {self.output_type} file: {self.output_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError writing output file {self.output_path}: {e}")
|
||||
|
||||
finally:
|
||||
# Clean up temp folder regardless of writing success/failure
|
||||
self.clean_up_temp_folder()
|
||||
|
||||
|
||||
#### GitHub ####
|
||||
def _clone_github_repo(self):
|
||||
"""Clones the repo to a temporary directory."""
|
||||
try:
|
||||
# Create temp dir *before* cloning into it
|
||||
# Use a more descriptive prefix/suffix if desired
|
||||
self.temp_folder_path = tempfile.mkdtemp(prefix="cbt_repo_")
|
||||
print(f"Cloning {self.input_path} into temporary folder {self.temp_folder_path}...")
|
||||
git.Repo.clone_from(self.input_path, self.temp_folder_path)
|
||||
|
||||
# Important: Update self.input_path to the temp folder *for processing*
|
||||
# self.input_path = self.temp_folder_path # Now done within get_text
|
||||
if self.verbose:
|
||||
print("GitHub repository cloned successfully.")
|
||||
return True # Indicate success
|
||||
except git.GitCommandError as e:
|
||||
print(f"Error cloning GitHub repository: {e}")
|
||||
# Clean up failed clone attempt
|
||||
self.clean_up_temp_folder() # Ensure cleanup even on clone failure
|
||||
self.temp_folder_path = None # Reset path
|
||||
return False # Indicate failure
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during cloning: {e}")
|
||||
self.clean_up_temp_folder() # Ensure cleanup
|
||||
self.temp_folder_path = None # Reset path
|
||||
return False # Indicate failure
|
||||
|
||||
def is_github_repo(self):
|
||||
"""Checks if the input path looks like a common Git repo URL."""
|
||||
# Keep it simple, add more patterns if needed
|
||||
return self.input_path.startswith(("https://github.com/", "git@github.com:", "https://gitlab.com/", "git@gitlab.com:", "https://bitbucket.org/", "git@bitbucket.org:")) \
|
||||
or self.input_path.endswith(".git") # Common convention for clone URLs
|
||||
|
||||
def clean_up_temp_folder(self):
|
||||
"""Removes the temporary folder if it was created."""
|
||||
if self.temp_folder_path and os.path.exists(self.temp_folder_path):
|
||||
try:
|
||||
shutil.rmtree(self.temp_folder_path)
|
||||
if self.verbose:
|
||||
print(f"Cleaned up temporary folder: {self.temp_folder_path}")
|
||||
self.temp_folder_path = None # Reset path after successful removal
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not remove temporary folder {self.temp_folder_path}: {e}")
|
||||
|
||||
# --- Main Execution ---
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate a single text or docx file from a codebase, respecting .gitignore and custom ignore rules.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter # Show defaults
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
help="Input path (local folder path or Git repository URL)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output",
|
||||
required=True,
|
||||
help="Output file path (e.g., my_codebase.txt or output/report.docx)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--output_type",
|
||||
choices=["txt", "docx"],
|
||||
default="txt",
|
||||
help="Output file type."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ignore",
|
||||
nargs='*', default=[], # Accept zero or more ignore patterns
|
||||
help="List of additional paths/patterns to ignore (e.g., 'dist/' '/node_modules' '*.log' 'config. Maches relative to the input path root."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exclude_hidden",
|
||||
action="store_true",
|
||||
help="Exclude files and folders starting with '.' or '__'. Note that .gitignore or custom ignores might already cover these."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", "--verbose",
|
||||
action="count", default=0, # Use count for verbosity levels (0, 1, 2)
|
||||
help="Increase output verbosity. -v for basic info, -vv for detailed ignore reasons."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Basic validation
|
||||
if not args.input:
|
||||
parser.error("Input path cannot be empty.")
|
||||
if not args.output:
|
||||
parser.error("Output path cannot be empty.")
|
||||
|
||||
|
||||
code_to_text = None # Ensure it's defined for finally block
|
||||
try:
|
||||
code_to_text = CodebaseToText(
|
||||
input_path=args.input,
|
||||
output_path=args.output,
|
||||
output_type=args.output_type,
|
||||
verbose=args.verbose,
|
||||
exclude_hidden=args.exclude_hidden,
|
||||
ignored_paths=args.ignore # Pass the list here
|
||||
)
|
||||
code_to_text.get_file()
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nAn unexpected error occurred: {e}")
|
||||
# Attempt cleanup even if initialization failed partially
|
||||
if code_to_text:
|
||||
code_to_text.clean_up_temp_folder()
|
||||
# No finally block needed here as get_file() now handles cleanup
|
||||
|
||||
def _sql_dump(database, export_file):
|
||||
command = [
|
||||
r'd:\xampp\mysql\bin\mysqldump.exe',
|
||||
'-u', 'root',
|
||||
'--no-data',
|
||||
database
|
||||
]
|
||||
|
||||
with open(export_file, 'w') as output_file:
|
||||
result = subprocess.run(command, stdout=output_file, stderr=subprocess.PIPE)
|
||||
|
||||
if(result.returncode == 0):
|
||||
print(f"Schema dump successful: {export_file}")
|
||||
else:
|
||||
print("Error occurred:", result.stderr.decode())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# --- Example Usage (replace with main() for CLI) ---
|
||||
|
||||
# To run from command line, save the script (e.g., codebase_to_text.py) and run:
|
||||
# python codebase_to_text.py . -o my_project.txt --ignore "dist/" "*.tmp" "/tests/data/" --exclude_hidden -v
|
||||
# python codebase_to_text.py https://github.com/user/repo.git -o repo_code.docx -t docx -vv
|
||||
|
||||
# --- Direct call example (useful for testing) ---
|
||||
try:
|
||||
print("Running direct example...")
|
||||
# Example: Process current directory, output to output.txt, ignore 'venv' folder and all '.log' files
|
||||
example_ignores = ["venv/", "*.log", "/output.txt", ".git/", "__pycache__/"] # Add common ignores
|
||||
example_ignores.append("public/css/")
|
||||
example_ignores.append("codebase-to-text.py")
|
||||
converter = CodebaseToText(
|
||||
input_path=".",
|
||||
output_path="_codebase_output.txt",
|
||||
output_type="txt",
|
||||
verbose=0, # Set verbosity level (0, 1, or 2)
|
||||
exclude_hidden=True,
|
||||
ignored_paths=example_ignores
|
||||
)
|
||||
converter.get_file()
|
||||
print("Direct example finished.")
|
||||
|
||||
_sql_dump('tp_servicedesk', '_codebase_schemafile.sql')
|
||||
except Exception as e:
|
||||
print(f"Error running direct example: {e}")
|
||||
|
||||
# Uncomment the line below to enable command-line argument parsing when running the script directly
|
||||
# main()
|
||||
|
||||
# to get a sql dump use somthing similar to
|
||||
# d:\xampp\mysql\bin\mysqldump.exe -u root --no-data tp_servicedesk > _codebase_schemafile.sql
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user