Merge pull request #1 from torrvision/fazl-pub

fbarez · web-flow · commit e3145b6bcd4b · 2024-11-25T09:18:07.000Z
Adding Fazl papers from 2023-2024
diff --git a/tvg_db/_publications/2023_measuring_value_alignment.md b/tvg_db/_publications/2023_measuring_value_alignment.md
@@ -0,0 +1,7 @@
+---
+title: "Measuring Value Alignment"
+year: 2023
+pdf_url: "https://arxiv.org/pdf/2312.15241"
+author_list: "Fazl Barez, Philip Torr"
+pub_in: arxiv
+---
diff --git a/tvg_db/_publications/2023_the_alan_turing_institutes_response_to_the_house_of_lords_large_language_models_call_for_evidence.md b/tvg_db/_publications/2023_the_alan_turing_institutes_response_to_the_house_of_lords_large_language_models_call_for_evidence.md
@@ -0,0 +1,7 @@
+---
+titie: "The Alan Turing Institute’s response to the House of Lords Large Language Models Call for Evidence"
+year: 2023
+pdf_url: "https://www.turing.ac.uk/news/publications/alan-turing-institutes-response-house-lords-large-language-models-call-evidence"
+author_list: "Fazl Barez, Philip H. S. Torr, Aleksandar Petrov, Carolyn Ashurst, Jennifer Ding, Ardi Janjeva, Alexander Babuta, Morgan Briggs, Jonathan Bright, Stephanie Cairns, Miranda Cross, David Leslie, Helen Margetts, Deborah Morgan, Jacob Pratt, Vincent Straub, Christopher Thomas, Sophie Arana, Christopher Burr, Cassandra Gould Van Praag, Kalle Westerling, Kirstie Whitaker, Arielle Bennett, Malvika Sharan, Bastian Greshake Tzovaras, Ashley Van De Casteele, Matt Fuller"
+pub_in: "The Alan Turing Institute"
+---
diff --git a/tvg_db/_publications/2024_interpreting_learned_feedback_patterns_in_large_language_models.md b/tvg_db/_publications/2024_interpreting_learned_feedback_patterns_in_large_language_models.md
@@ -0,0 +1,8 @@
+---
+title: "Interpreting Learned Feedback Patterns in Large Language Models"
+year: 2023
+pdf_url: "https://openreview.net/pdf?id=xUoNgR1Byy",
+author_list: "Luke Marks, Amir Abdullah, Luna Mendez, Rauno Arike, David Krueger, Philip Torr, Fazl Barez"
+pub_in: "Neurips 2024"
+---
+
diff --git a/tvg_db/_publications/2024_poisonbench_assessing_large_language_model_vulnerability_to_data_poisoning.md b/tvg_db/_publications/2024_poisonbench_assessing_large_language_model_vulnerability_to_data_poisoning.md
@@ -0,0 +1,9 @@
+---
+title: "PoisonBench: Assessing Large Language Model Vulnerability to Data Poisoning"
+year: 2024
+pdf_url: "https://arxiv.org/pdf/2410.08811",
+author_list: "Tingchen Fu, Mrinank Sharma, Philip Torr, Shay B Cohen, David Krueger, Fazl Barez"
+pub_in: "arxiv"
+---
+
+
diff --git a/tvg_db/_publications/2024_sparse_autoencoders_reveal_universal_feature_spaces_across_large_language_models.md b/tvg_db/_publications/2024_sparse_autoencoders_reveal_universal_feature_spaces_across_large_language_models.md
@@ -0,0 +1,7 @@
+---
+title: "Sparse autoencoders reveal universal feature spaces across large language models"
+year: 2024
+pdf_url: "https://arxiv.org/pdf/2410.06981"
+author_list: "Michael Lan, Philip Torr, Austin Meek, Ashkan Khakzar, David Krueger, Fazl Barez"
+pub_in: arxiv
+---
diff --git a/tvg_db/_publications/2024_towards_Interpreting_visual_Information_processing_in_visionlanguage_models.md b/tvg_db/_publications/2024_towards_Interpreting_visual_Information_processing_in_visionlanguage_models.md
@@ -0,0 +1,7 @@
+---
+title: "Towards Interpreting Visual Information Processing in Vision-Language Models"
+year: 2024
+pdf_url: "https://arxiv.org/pdf/2410.07149"
+author_list: "Clement Neo, Luke Ong, Philip Torr, Mor Geva, David Krueger, Fazl Barez"
+pub_in: "arxiv"
+---
diff --git a/tvg_db/_publications/2024_towards_interpretable_sequence_continuation_analyzing_shared_circuits_in_large_language_models.md b/tvg_db/_publications/2024_towards_interpretable_sequence_continuation_analyzing_shared_circuits_in_large_language_models.md
@@ -0,0 +1,7 @@
+---
+title: "Towards Interpretable Sequence Continuation: Analyzing Shared Circuits in Large Language Models"
+year: 2024
+pdf_url: "https://aclanthology.org/2024.emnlp-main.699.pdf"
+author_list: "Michael Lan, Philip Torr, Fazl Barez"
+pub_in: "EMNLP 2024"
+---