더북(TheBook)
import re

df_train = pd.read_csv(DATA_PATH + "train.csv")  # 전처리를 위해 다시 로드

# URL 제거 정규식 패턴
url_pattern = r"https?://\S+|www\.\S+"

# URL을 포함하는 케이스 찾기
index_has_url = df_train["comment_text"].str.contains(url_pattern)
text_has_url = df_train.loc[index_has_url, "comment_text"]

# 샘플 테스트
sample = text_has_url.iloc[14]

>>> print(f"Sample:\n {sample}\n")
>>> print(f"Remove URL:\n {re.sub(url_pattern, '', sample)}")
Sample:
 https://en.wikipedia.org/wiki/John_Williams, born 1932. Other movie credits include Jaws, ET, Indiana Jones (several), Jurassic Park, Schindler›s List.  Doesn›t get much better than that.  As for SW release date https://en.wikipedia.org/wiki/Star_Wars_%28film%29.

Remove URL:
  born 1932. Other movie credits include Jaws, ET, Indiana Jones (several), Jurassic Park, Schindler›s List. Doesn›t get much better than that. As for SW release date