Calculates the cosine similarity between two phrases. When you enter the phrases, do not use punctuation. Exact syntax is matched.
Video from StatQuest (January 29, 2023): https://www.youtube.com/watch?v=e9U0QAFbfLI
# phrases prograrm # 2023-08-06 ews from math import * # subroutines def unique(l): u=[] for i in l: if i not in u: u.append(i) return u def counta(lmain,lsrc): c=[lsrc.count(i) for i in lmain] return c def norm(v): # list have integers s=[i**2 for i in v] s=sqrt(sum(s)) return s # main program print("\nDo not use punctuation") str1=input("phrase 1? ") str2=input("phrase 2? ") # split into 2 lists list1=str1.split() list2=str2.split() # find the unique list list3=list1+list2 list3=unique(list3) # word count listc1=counta(list3,list1) listc2=counta(list3,list2) # vector operations # norm n1=norm(listc1) n2=norm(listc2) # dot d=sum([listc1[i]*listc2[i] for i in range(len(listc1))]) # cosine similarity c=d/(n1*n2) # no need to take the arccosine print("cosine similarity: ") print(c) print("\n0: no words in common \n1: all words in common")