import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
# from wordcloud import WordCloud
import json

# Load dataset
file_path = "AV_OCR/annotations/full_annotations.json"  # Replace with your file path

with open(file_path, "r") as file:
    data = json.load(file)
df = pd.DataFrame(data)

# # Flatten and count matched_signs
all_signs = [sign for signs in df['matched_signs'] if signs for sign in signs]
sign_counts = Counter(all_signs)



# Sort sign counts in decreasing order
sorted_sign_counts = dict(sorted(sign_counts.items(), key=lambda item: item[1], reverse=True))

# Limit to top 20 most frequent signs
top_n = 30  # Adjust this value as needed
top_signs = dict(list(sorted_sign_counts.items())[:top_n])

# Plot the top signs with sorted frequencies
fsize=24
plt.figure(figsize=(8, 8))  # Increase width
plt.bar(top_signs.keys(), top_signs.values(), color="skyblue")
plt.xlabel('Traffic Signs', fontsize=fsize)
plt.ylabel('Frequency', fontsize=fsize)
plt.title(f'Top {top_n} Traffic Signs by Frequency', fontsize=fsize+10)
plt.xticks(rotation=90, fontsize=fsize-5)
plt.yticks(fontsize=fsize)
#plt.tight_layout()
plt.show()