22 lines
649 B
Python
22 lines
649 B
Python
|
|
from dataclasses import dataclass
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class ProcessingConfig:
|
||
|
|
"""Configuration for text processing pipeline."""
|
||
|
|
model_path: str
|
||
|
|
data_directory: str
|
||
|
|
filename_pattern: str
|
||
|
|
batch_size_triple: int = 16
|
||
|
|
batch_size_concept: int = 64
|
||
|
|
output_directory: str = "./generation_result_debug"
|
||
|
|
total_shards_triple: int = 1
|
||
|
|
current_shard_triple: int = 0
|
||
|
|
total_shards_concept: int = 1
|
||
|
|
current_shard_concept: int = 0
|
||
|
|
use_8bit: bool = False
|
||
|
|
debug_mode: bool = False
|
||
|
|
resume_from: int = 0
|
||
|
|
record : bool = False
|
||
|
|
max_new_tokens: int = 8192
|
||
|
|
max_workers: int = 8
|
||
|
|
remove_doc_spaces: bool = False
|