#!/usr/bin/python3
import sys
import datetime
import glob
import json
import os

#===============================================================================
def main(argv):
    '''
    Main function to process directories and run crawlers.
    '''
    cwd = os.getcwd()
    if len(argv) != 2 and len(argv) != 3:
        print("Usage: " + argv[0] + " <directory> [endDate:YYYY-MM-DD]")
        sys.exit(1)

    rootdir = argv[1]
    
    # Ensure the log directory exists
    if not os.path.exists(rootdir):
        os.makedirs(rootdir)

    log_filename = os.path.join(rootdir, datetime.datetime.today().strftime('%Y-%m-%d') + ".log")
    with open(log_filename, "a") as logfile:
        for path in sorted(glob.glob(os.path.join(rootdir, '*/'))):
            print(path)
            if not os.path.exists(os.path.join(path, 'settings.json')):
                logfile.write(f"\t configuration file <{path}settings.json> not found.\n")
                continue

            with open(os.path.join(path, 'settings.json')) as json_file:
                cfg = json.load(json_file)
                print(cfg)

            if not cfg["enabled"]:
                continue

            logfile.write(f"Crawler {cfg['crawler']} started at: {datetime.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')}\n")

            new_cwd = os.path.join("/home/debian/crawlersNoticias/spiders/", cfg["crawler"])
            if os.path.exists(new_cwd):
                os.chdir(new_cwd)
                try:
                    os.system("python3 ../../../scripts/siteCrawler.py " + path)
                except Exception as e:
                    logfile.write(f"\t Error executing siteCrawler.py: {str(e)}\n")
                finally:
                    os.chdir(cwd)
            else:
                logfile.write("\t spider not found.\n")

#-------------------------------------------------------------------------------
if __name__ == "__main__":
    main(sys.argv)