first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,160 @@
|
||||
import csv
|
||||
from tqdm import tqdm
|
||||
|
||||
def check_created_csv_header(keyword, csv_dir):
|
||||
keyword_to_paths ={
|
||||
'cc_en':{
|
||||
'node_with_numeric_id': f"{csv_dir}/triple_nodes_cc_en_from_json_without_emb_with_numeric_id.csv",
|
||||
'edge_with_numeric_id': f"{csv_dir}/triple_edges_cc_en_from_json_without_emb_with_numeric_id.csv",
|
||||
'text_with_numeric_id': f"{csv_dir}/text_nodes_cc_en_from_json_with_numeric_id.csv",
|
||||
'concept_with_numeric_id': f"{csv_dir}/concept_nodes_pes2o_abstract_from_json_without_emb_with_numeric_id.csv",
|
||||
},
|
||||
'pes2o_abstract':{
|
||||
'node_with_numeric_id': f"{csv_dir}/triple_nodes_pes2o_abstract_from_json_without_emb_with_numeric_id.csv",
|
||||
'edge_with_numeric_id': f"{csv_dir}/triple_edges_pes2o_abstract_from_json_without_emb_full_concept_with_numeric_id.csv",
|
||||
'text_with_numeric_id': f"{csv_dir}/text_nodes_pes2o_abstract_from_json_with_numeric_id.csv",
|
||||
},
|
||||
'en_simple_wiki_v0':{
|
||||
'node_with_numeric_id': f"{csv_dir}/triple_nodes_en_simple_wiki_v0_from_json_without_emb_with_numeric_id.csv",
|
||||
'edge_with_numeric_id': f"{csv_dir}/triple_edges_en_simple_wiki_v0_from_json_without_emb_full_concept_with_numeric_id.csv",
|
||||
'text_with_numeric_id': f"{csv_dir}/text_nodes_en_simple_wiki_v0_from_json_with_numeric_id.csv",
|
||||
},
|
||||
}
|
||||
for key, path in keyword_to_paths[keyword].items():
|
||||
with open(path) as infile:
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
print(f"Header of {key}: {header}")
|
||||
|
||||
# print first 5 rows
|
||||
for i, row in enumerate(reader):
|
||||
if i < 1:
|
||||
print(row)
|
||||
else:
|
||||
break
|
||||
|
||||
def add_csv_columns(node_csv, edge_csv, text_csv, node_with_numeric_id, edge_with_numeric_id, text_with_numeric_id):
|
||||
with open(node_csv) as infile, open(node_with_numeric_id, 'w', newline='') as outfile:
|
||||
reader = csv.reader(infile)
|
||||
writer = csv.writer(outfile)
|
||||
header = next(reader)
|
||||
print(header)
|
||||
label_index = header.index(':LABEL')
|
||||
header.insert(label_index, 'numeric_id') # Add new column name
|
||||
writer.writerow(header)
|
||||
for row_number, row in tqdm(enumerate(reader), desc="Adding numeric ID"):
|
||||
row.insert(label_index, row_number) # Add numeric ID before ':LABEL'
|
||||
writer.writerow(row)
|
||||
with open(edge_csv) as infile, open(edge_with_numeric_id, 'w', newline='') as outfile:
|
||||
reader = csv.reader(infile)
|
||||
writer = csv.writer(outfile)
|
||||
header = next(reader)
|
||||
print(header)
|
||||
label_index = header.index(':TYPE')
|
||||
header.insert(label_index, 'numeric_id') # Add new column name
|
||||
writer.writerow(header)
|
||||
for row_number, row in tqdm(enumerate(reader), desc="Adding numeric ID"):
|
||||
row.insert(label_index, row_number) # Add numeric ID before ':LABEL'
|
||||
writer.writerow(row)
|
||||
with open(text_csv) as infile, open(text_with_numeric_id, 'w', newline='') as outfile:
|
||||
reader = csv.reader(infile)
|
||||
writer = csv.writer(outfile)
|
||||
header = next(reader)
|
||||
print(header)
|
||||
label_index = header.index(':LABEL')
|
||||
header.insert(label_index, 'numeric_id') # Add new column name
|
||||
writer.writerow(header)
|
||||
for row_number, row in tqdm(enumerate(reader), desc="Adding numeric ID"):
|
||||
row.insert(label_index, row_number) # Add numeric ID before ':LABEL'
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
# def add_csv_columns(keyword, csv_dir):
|
||||
# keyword_to_paths ={
|
||||
# 'cc_en':{
|
||||
# 'node_csv': f"{csv_dir}/triple_nodes_cc_en_from_json_without_emb.csv",
|
||||
# 'edge_csv': f"{csv_dir}/triple_edges_cc_en_from_json_without_emb.csv",
|
||||
# 'text_csv': f"{csv_dir}/text_nodes_cc_en_from_json.csv",
|
||||
|
||||
# 'node_with_numeric_id': f"{csv_dir}/triple_nodes_cc_en_from_json_without_emb_with_numeric_id.csv",
|
||||
# 'edge_with_numeric_id': f"{csv_dir}/triple_edges_cc_en_from_json_without_emb_with_numeric_id.csv",
|
||||
# 'text_with_numeric_id': f"{csv_dir}/text_nodes_cc_en_from_json_with_numeric_id.csv"
|
||||
# },
|
||||
# 'pes2o_abstract':{
|
||||
# 'node_csv': f"{csv_dir}/triple_nodes_pes2o_abstract_from_json_without_emb.csv",
|
||||
# 'edge_csv': f"{csv_dir}/triple_edges_pes2o_abstract_from_json_without_emb_full_concept.csv",
|
||||
# 'text_csv': f"{csv_dir}/text_nodes_pes2o_abstract_from_json.csv",
|
||||
|
||||
# 'node_with_numeric_id': f"{csv_dir}/triple_nodes_pes2o_abstract_from_json_without_emb_with_numeric_id.csv",
|
||||
# 'edge_with_numeric_id': f"{csv_dir}/triple_edges_pes2o_abstract_from_json_without_emb_full_concept_with_numeric_id.csv",
|
||||
# 'text_with_numeric_id': f"{csv_dir}/text_nodes_pes2o_abstract_from_json_with_numeric_id.csv"
|
||||
# },
|
||||
# 'en_simple_wiki_v0':{
|
||||
# 'node_csv': f"{csv_dir}/triple_nodes_en_simple_wiki_v0_from_json_without_emb.csv",
|
||||
# 'edge_csv': f"{csv_dir}/triple_edges_en_simple_wiki_v0_from_json_without_emb_full_concept.csv",
|
||||
# 'text_csv': f"{csv_dir}/text_nodes_en_simple_wiki_v0_from_json.csv",
|
||||
|
||||
# 'node_with_numeric_id': f"{csv_dir}/triple_nodes_en_simple_wiki_v0_from_json_without_emb_with_numeric_id.csv",
|
||||
# 'edge_with_numeric_id': f"{csv_dir}/triple_edges_en_simple_wiki_v0_from_json_without_emb_full_concept_with_numeric_id.csv",
|
||||
# 'text_with_numeric_id': f"{csv_dir}/text_nodes_en_simple_wiki_v0_from_json_with_numeric_id.csv"
|
||||
# },
|
||||
# }
|
||||
# # ouput node
|
||||
# with open(keyword_to_paths[keyword]['node_csv']) as infile, open(keyword_to_paths[keyword]['node_with_numeric_id'], 'w') as outfile:
|
||||
# reader = csv.reader(infile)
|
||||
# writer = csv.writer(outfile)
|
||||
|
||||
# # Read the header
|
||||
# header = next(reader)
|
||||
# print(header)
|
||||
# # Insert 'numeric_id' before ':LABEL'
|
||||
# label_index = header.index(':LABEL')
|
||||
# header.insert(label_index, 'numeric_id') # Add new column name
|
||||
# writer.writerow(header)
|
||||
|
||||
# # Process each row and add a numeric ID
|
||||
# for row_number, row in tqdm(enumerate(reader), desc="Adding numeric ID"):
|
||||
# row.insert(label_index, row_number) # Add numeric ID before ':LABEL'
|
||||
# writer.writerow(row)
|
||||
|
||||
# # output edge (TYPE instead of LABEL for edge)
|
||||
# with open(keyword_to_paths[keyword]['edge_csv']) as infile, open(keyword_to_paths[keyword]['edge_with_numeric_id'], 'w') as outfile:
|
||||
# reader = csv.reader(infile)
|
||||
# writer = csv.writer(outfile)
|
||||
|
||||
# # Read the header
|
||||
# header = next(reader)
|
||||
# print(header)
|
||||
# # Insert 'numeric_id' before ':TYPE'
|
||||
# label_index = header.index(':TYPE')
|
||||
# header.insert(label_index, 'numeric_id') # Add new column name
|
||||
# writer.writerow(header)
|
||||
|
||||
# # Process each row and add a numeric ID
|
||||
# for row_number, row in tqdm(enumerate(reader), desc="Adding numeric ID"):
|
||||
# row.insert(label_index, row_number) # Add numeric ID before ':LABEL'
|
||||
# writer.writerow(row)
|
||||
|
||||
# # output text
|
||||
# with open(keyword_to_paths[keyword]['text_csv']) as infile, open(keyword_to_paths[keyword]['text_with_numeric_id'], 'w') as outfile:
|
||||
# reader = csv.reader(infile)
|
||||
# writer = csv.writer(outfile)
|
||||
|
||||
# # Read the header
|
||||
# header = next(reader)
|
||||
# print(header)
|
||||
# # Insert 'numeric_id' before ':LABEL'
|
||||
# label_index = header.index(':LABEL')
|
||||
# header.insert(label_index, 'numeric_id') # Add new column name
|
||||
# writer.writerow(header)
|
||||
|
||||
# # Process each row and add a numeric ID
|
||||
# for row_number, row in tqdm(enumerate(reader), desc="Adding numeric ID"):
|
||||
# row.insert(label_index, row_number) # Add numeric ID before ':LABEL'
|
||||
# writer.writerow(row)
|
||||
|
||||
if __name__ == "__main__":
|
||||
keyword = "en_simple_wiki_v0"
|
||||
csv_dir = "./import" # Change this to your CSV directory
|
||||
add_csv_columns(keyword, csv_dir)
|
||||
# check_created_csv_header(keyword)
|
||||
@ -0,0 +1,189 @@
|
||||
import networkx as nx
|
||||
import csv
|
||||
import ast
|
||||
import hashlib
|
||||
import os
|
||||
from atlas_rag.kg_construction.triple_config import ProcessingConfig
|
||||
import pickle
|
||||
|
||||
def get_node_id(entity_name, entity_to_id={}):
|
||||
"""Returns existing or creates new nX ID for an entity using a hash-based approach."""
|
||||
if entity_name not in entity_to_id:
|
||||
# Use a hash function to generate a unique ID
|
||||
hash_object = hashlib.sha256(entity_name.encode('utf-8'))
|
||||
hash_hex = hash_object.hexdigest() # Get the hexadecimal representation of the hash
|
||||
# Use the first 8 characters of the hash as the ID (you can adjust the length as needed)
|
||||
entity_to_id[entity_name] = hash_hex
|
||||
return entity_to_id[entity_name]
|
||||
|
||||
def csvs_to_temp_graphml(triple_node_file, triple_edge_file, config:ProcessingConfig=None):
|
||||
g = nx.DiGraph()
|
||||
entity_to_id = {}
|
||||
|
||||
# Add triple nodes
|
||||
with open(triple_node_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
node_id = row["name:ID"]
|
||||
mapped_id = get_node_id(node_id, entity_to_id)
|
||||
if mapped_id not in g.nodes:
|
||||
g.add_node(mapped_id, id=node_id, type=row["type"])
|
||||
|
||||
|
||||
# Add triple edges
|
||||
with open(triple_edge_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
start_id = get_node_id(row[":START_ID"], entity_to_id)
|
||||
end_id = get_node_id(row[":END_ID"], entity_to_id)
|
||||
# Check if edge already exists to prevent duplicates
|
||||
if not g.has_edge(start_id, end_id):
|
||||
g.add_edge(start_id, end_id, relation=row["relation"], type=row[":TYPE"])
|
||||
|
||||
# save graph to
|
||||
output_name = f"{config.output_directory}/kg_graphml/{config.filename_pattern}_without_concept.pkl"
|
||||
# check if output file directory exists
|
||||
output_dir = os.path.dirname(output_name)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
# store the graph to a pickle file
|
||||
with open(output_name, 'wb') as output_file:
|
||||
pickle.dump(g, output_file)
|
||||
|
||||
|
||||
|
||||
def csvs_to_graphml(triple_node_file, text_node_file, concept_node_file,
|
||||
triple_edge_file, text_edge_file, concept_edge_file,
|
||||
output_file):
|
||||
'''
|
||||
Convert multiple CSV files into a single GraphML file.
|
||||
|
||||
Types of nodes to be added to the graph:
|
||||
- Triple nodes: Nodes representing triples, with properties like subject, predicate, object.
|
||||
- Text nodes: Nodes representing text, with properties like text content.
|
||||
- Concept nodes: Nodes representing concepts, with properties like concept name and type.
|
||||
|
||||
Types of edges to be added to the graph:
|
||||
- Triple edges: Edges representing relationships between triples, with properties like relation type.
|
||||
- Text edges: Edges representing relationships between text and nodes, with properties like text type.
|
||||
- Concept edges: Edges representing relationships between concepts and nodes, with properties like concept type.
|
||||
|
||||
DiGraph networkx attributes:
|
||||
Node:
|
||||
- type: Type of the node (e.g., entity, event, text, concept).
|
||||
- file_id: List of text IDs the node is associated with.
|
||||
- id: Node Name
|
||||
Edge:
|
||||
- relation: relation name
|
||||
- file_id: List of text IDs the edge is associated with.
|
||||
- type: Type of the edge (e.g., Source, Relation, Concept).
|
||||
- synsets: List of synsets associated with the edge.
|
||||
|
||||
'''
|
||||
g = nx.DiGraph()
|
||||
entity_to_id = {}
|
||||
|
||||
# Add triple nodes
|
||||
with open(triple_node_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
node_id = row["name:ID"]
|
||||
mapped_id = get_node_id(node_id, entity_to_id)
|
||||
# Check if node already exists to prevent duplicates
|
||||
if mapped_id not in g.nodes:
|
||||
g.add_node(mapped_id, id=node_id, type=row["type"])
|
||||
|
||||
# Add text nodes
|
||||
with open(text_node_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
node_id = row["text_id:ID"]
|
||||
# Check if node already exists to prevent duplicates
|
||||
if node_id not in g.nodes:
|
||||
g.add_node(node_id, file_id=node_id, id=row["original_text"], type="passage")
|
||||
|
||||
# Add concept nodes
|
||||
with open(concept_node_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
node_id = row["concept_id:ID"]
|
||||
# Check if node already exists to prevent duplicates
|
||||
if node_id not in g.nodes:
|
||||
g.add_node(node_id, file_id="concept_file", id=row["name"], type="concept")
|
||||
|
||||
# Add file id for triple nodes and concept nodes when add the edges
|
||||
|
||||
# Add triple edges
|
||||
with open(triple_edge_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
start_id = get_node_id(row[":START_ID"], entity_to_id)
|
||||
end_id = get_node_id(row[":END_ID"], entity_to_id)
|
||||
# Check if edge already exists to prevent duplicates
|
||||
if not g.has_edge(start_id, end_id):
|
||||
g.add_edge(start_id, end_id, relation=row["relation"], type=row[":TYPE"])
|
||||
# Add file_id to start and end nodes if they are triple or concept nodes
|
||||
for node_id in [start_id, end_id]:
|
||||
if g.nodes[node_id]['type'] in ['triple', 'concept'] and 'file_id' not in g.nodes[node_id]:
|
||||
g.nodes[node_id]['file_id'] = row.get("file_id", "triple_file")
|
||||
|
||||
# Add concepts to the edge
|
||||
concepts = ast.literal_eval(row["concepts"])
|
||||
for concept in concepts:
|
||||
if "concepts" not in g.edges[start_id, end_id]:
|
||||
g.edges[start_id, end_id]['concepts'] = str(concept)
|
||||
else:
|
||||
# Avoid duplicate concepts by checking if concept is already in the list
|
||||
current_concepts = g.edges[start_id, end_id]['concepts'].split(",")
|
||||
if str(concept) not in current_concepts:
|
||||
g.edges[start_id, end_id]['concepts'] += "," + str(concept)
|
||||
|
||||
|
||||
# Add text edges
|
||||
with open(text_edge_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
start_id = get_node_id(row[":START_ID"], entity_to_id)
|
||||
end_id = row[":END_ID"]
|
||||
# Check if edge already exists to prevent duplicates
|
||||
if not g.has_edge(start_id, end_id):
|
||||
g.add_edge(start_id, end_id, relation="mention in", type=row[":TYPE"])
|
||||
# Add file_id to start node if it is a triple or concept node
|
||||
if 'file_id' in g.nodes[start_id]:
|
||||
g.nodes[start_id]['file_id'] += "," + str(end_id)
|
||||
else:
|
||||
g.nodes[start_id]['file_id'] = str(end_id)
|
||||
|
||||
# Add concept edges between triple nodes and concept nodes
|
||||
with open(concept_edge_file, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
start_id = get_node_id(row[":START_ID"], entity_to_id)
|
||||
end_id = row[":END_ID"] # end id is concept node id
|
||||
if not g.has_edge(start_id, end_id):
|
||||
g.add_edge(start_id, end_id, relation=row["relation"], type=row[":TYPE"])
|
||||
|
||||
# Write to GraphML
|
||||
# check if output file directory exists
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
nx.write_graphml(g, output_file, infer_numeric_types=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert CSV files to GraphML format.')
|
||||
parser.add_argument('--triple_node_file', type=str, required=True, help='Path to the triple node CSV file.')
|
||||
parser.add_argument('--text_node_file', type=str, required=True, help='Path to the text node CSV file.')
|
||||
parser.add_argument('--concept_node_file', type=str, required=True, help='Path to the concept node CSV file.')
|
||||
parser.add_argument('--triple_edge_file', type=str, required=True, help='Path to the triple edge CSV file.')
|
||||
parser.add_argument('--text_edge_file', type=str, required=True, help='Path to the text edge CSV file.')
|
||||
parser.add_argument('--concept_edge_file', type=str, required=True, help='Path to the concept edge CSV file.')
|
||||
parser.add_argument('--output_file', type=str, required=True, help='Path to the output GraphML file.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
csvs_to_graphml(args.triple_node_file, args.text_node_file, args.concept_node_file,
|
||||
args.triple_edge_file, args.text_edge_file, args.concept_edge_file,
|
||||
args.output_file)
|
||||
@ -0,0 +1,70 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from ast import literal_eval # Safer string-to-list conversion
|
||||
import os
|
||||
|
||||
CHUNKSIZE = 100_000 # Adjust based on your RAM (100K rows per chunk)
|
||||
EMBEDDING_COL = "embedding:STRING" # Column name with embeddings
|
||||
# DIMENSION = 32 # Update with your embedding dimension
|
||||
ENTITY_ONLY = True
|
||||
def parse_embedding(embed_str):
|
||||
"""Convert embedding string to numpy array"""
|
||||
# Remove brackets and convert to list
|
||||
return np.array(literal_eval(embed_str), dtype=np.float32)
|
||||
|
||||
# Create memory-mapped numpy file
|
||||
def convert_csv_to_npy(csv_path, npy_path):
|
||||
total_embeddings = 0
|
||||
# check dir exist, if not then create it
|
||||
os.makedirs(os.path.dirname(npy_path), exist_ok=True)
|
||||
|
||||
with open(npy_path, "wb") as f:
|
||||
pass # Initialize empty file
|
||||
|
||||
# Process CSV in chunks
|
||||
for chunk_idx, df_chunk in enumerate(
|
||||
pd.read_csv(csv_path, chunksize=CHUNKSIZE, usecols=[EMBEDDING_COL])
|
||||
):
|
||||
|
||||
|
||||
# Parse embeddings
|
||||
embeddings = np.stack(
|
||||
df_chunk[EMBEDDING_COL].apply(parse_embedding).values
|
||||
)
|
||||
|
||||
# Verify dimensions
|
||||
# assert embeddings.shape[1] == DIMENSION, \
|
||||
# f"Dimension mismatch at chunk {chunk_idx}"
|
||||
total_embeddings += embeddings.shape[0]
|
||||
# Append to .npy file
|
||||
with open(npy_path, "ab") as f:
|
||||
np.save(f, embeddings.astype(np.float32))
|
||||
|
||||
print(f"Processed chunk {chunk_idx} ({CHUNKSIZE*(chunk_idx+1)} rows)")
|
||||
print(f"Total number of embeddings: {total_embeddings}")
|
||||
print("Conversion complete!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
keyword = 'cc_en' # Change this to your desired keyword
|
||||
csv_dir="./import" # Change this to your CSV directory
|
||||
keyword_to_paths ={
|
||||
'cc_en':{
|
||||
'node_csv': f"{csv_dir}/triple_nodes_cc_en_from_json_2.csv",
|
||||
# 'edge_csv': f"{csv_dir}/triple_edges_cc_en_from_json_2.csv",
|
||||
'text_csv': f"{csv_dir}/text_nodes_cc_en_from_json_with_emb.csv",
|
||||
},
|
||||
'pes2o_abstract':{
|
||||
'node_csv': f"{csv_dir}/triple_nodes_pes2o_abstract_from_json.csv",
|
||||
# 'edge_csv': f"{csv_dir}/triple_edges_pes2o_abstract_from_json.csv",
|
||||
'text_csv': f"{csv_dir}/text_nodes_pes2o_abstract_from_json_with_emb.csv",
|
||||
},
|
||||
'en_simple_wiki_v0':{
|
||||
'node_csv': f"{csv_dir}/triple_nodes_en_simple_wiki_v0_from_json.csv",
|
||||
# 'edge_csv': f"{csv_dir}/triple_edges_en_simple_wiki_v0_from_json.csv",
|
||||
'text_csv': f"{csv_dir}/text_nodes_en_simple_wiki_v0_from_json_with_emb.csv",
|
||||
},
|
||||
}
|
||||
for key, path in keyword_to_paths[keyword].items():
|
||||
npy_path = path.replace(".csv", ".npy")
|
||||
convert_csv_to_npy(path, npy_path)
|
||||
print(f"Converted {path} to {npy_path}")
|
||||
@ -0,0 +1,27 @@
|
||||
import os
|
||||
import glob
|
||||
|
||||
def merge_csv_files(output_file, input_dir):
|
||||
"""
|
||||
Merge all CSV files in the input directory into a single output file.
|
||||
|
||||
Args:
|
||||
output_file (str): Path to the output CSV file.
|
||||
input_dir (str): Directory containing the input CSV files.
|
||||
"""
|
||||
# Delete the output file if it exists
|
||||
if os.path.exists(output_file):
|
||||
os.remove(output_file)
|
||||
|
||||
# Write the header to the output file
|
||||
with open(output_file, 'w') as outfile:
|
||||
outfile.write("node,conceptualized_node,node_type\n")
|
||||
|
||||
# Append the contents of all CSV files in the input directory
|
||||
for csv_file in glob.glob(os.path.join(input_dir, '*.csv')):
|
||||
with open(csv_file, 'r') as infile:
|
||||
# Skip the header line
|
||||
next(infile)
|
||||
# Append the remaining lines to the output file
|
||||
with open(output_file, 'a') as outfile:
|
||||
outfile.writelines(infile)
|
||||
Reference in New Issue
Block a user